├── .gitignore ├── tools ├── Makefile ├── bin │ └── replacetoken └── replacetoken.c ├── projects ├── test │ ├── cpp │ │ ├── obj │ │ │ └── main │ │ ├── Makefile │ │ └── main.cpp │ ├── replace_inout_token.sh │ ├── Makefile │ └── HwMain.bsv ├── image_proc │ ├── edge.dat │ ├── example.dat │ ├── output.dat │ ├── output.png │ ├── cpp │ │ ├── Makefile │ │ └── main.cpp │ ├── output2png.py │ ├── replace_inout_token.sh │ ├── HwMain.bsv │ └── Makefile ├── rv32i │ ├── sw │ │ ├── pipesafe.bin │ │ ├── microbench.bin │ │ ├── minisudoku.bin │ │ ├── pipeunsafe1.bin │ │ ├── pipeunsafe2.bin │ │ ├── pipeunsafe1.s │ │ ├── pipeunsafe2.s │ │ ├── pipesafe.s │ │ ├── pipeunsafe1.dump │ │ ├── pipeunsafe2.dump │ │ ├── pipesafe.dump │ │ ├── microbench.s │ │ ├── minisudoku.c │ │ └── microbench.dump │ ├── processor │ │ ├── Defines.bsv │ │ ├── BranchPredictor.bsv │ │ ├── RFile.bsv │ │ ├── Execute.bsv │ │ ├── Scoreboard.bsv │ │ ├── Processor.bsv │ │ └── Decode.bsv │ ├── Makefile │ ├── cpp │ │ └── main.cpp │ └── Top.bsv ├── matrix4x4 │ ├── cpp │ │ ├── Makefile │ │ └── main.cpp │ ├── replace_inout_token.sh │ ├── Makefile │ └── HwMain.bsv ├── nn_fc │ ├── cpp │ │ ├── Makefile │ │ ├── nn_fc.h │ │ ├── nn_fc.cpp │ │ └── main.cpp │ ├── replace_inout_token.sh │ ├── Makefile │ ├── HwMain.bsv │ └── NnFc.bsv ├── nn_fc_zfpe │ ├── cpp │ │ ├── Makefile │ │ ├── nn_fc.h │ │ └── nn_fc.cpp │ ├── replace_inout_token.sh │ ├── ZfpCompress.bsv │ ├── ZfpDecompress.bsv │ ├── NnFc.bsv │ └── HwMain.bsv └── nn_fc_accel │ ├── cpp │ ├── Makefile │ ├── nn_fc.h │ └── nn_fc.cpp │ ├── replace_inout_token.sh │ ├── Makefile │ ├── ZfpDecompress.bsv │ └── NnFc.bsv ├── src ├── verilog │ ├── inout16.v │ ├── mult18x18d.v │ ├── pll_fastclk.v │ └── sdram.v ├── bsv_verilog │ ├── RevertReg.v │ ├── SyncResetA.v │ ├── MakeResetA.v │ ├── FIFO1.v │ ├── BRAM2.v │ ├── ClockDiv.v │ └── FIFO2.v ├── cpp │ ├── ttyifc.h │ ├── bdpiSdram.cpp │ └── ttyifc.cpp ├── PLL.bsv ├── BRAMSubWord.bsv ├── Top.bsv ├── Mult18x18D.bsv ├── Uart.bsv ├── SimpleFloat.bsv └── Sdram.bsv └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.sw? 2 | bsim 3 | build 4 | *.log 5 | *.txt 6 | *.o 7 | obj 8 | -------------------------------------------------------------------------------- /tools/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | mkdir -p bin 3 | g++ -o bin/replacetoken replacetoken.c -Wall -pedantic 4 | -------------------------------------------------------------------------------- /tools/bin/replacetoken: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sangwoojun/ulx3s_bsv/HEAD/tools/bin/replacetoken -------------------------------------------------------------------------------- /projects/test/cpp/obj/main: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sangwoojun/ulx3s_bsv/HEAD/projects/test/cpp/obj/main -------------------------------------------------------------------------------- /projects/image_proc/edge.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sangwoojun/ulx3s_bsv/HEAD/projects/image_proc/edge.dat -------------------------------------------------------------------------------- /projects/image_proc/example.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sangwoojun/ulx3s_bsv/HEAD/projects/image_proc/example.dat -------------------------------------------------------------------------------- /projects/image_proc/output.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sangwoojun/ulx3s_bsv/HEAD/projects/image_proc/output.dat -------------------------------------------------------------------------------- /projects/image_proc/output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sangwoojun/ulx3s_bsv/HEAD/projects/image_proc/output.png -------------------------------------------------------------------------------- /projects/rv32i/sw/pipesafe.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sangwoojun/ulx3s_bsv/HEAD/projects/rv32i/sw/pipesafe.bin -------------------------------------------------------------------------------- /projects/rv32i/sw/microbench.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sangwoojun/ulx3s_bsv/HEAD/projects/rv32i/sw/microbench.bin -------------------------------------------------------------------------------- /projects/rv32i/sw/minisudoku.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sangwoojun/ulx3s_bsv/HEAD/projects/rv32i/sw/minisudoku.bin -------------------------------------------------------------------------------- /projects/rv32i/sw/pipeunsafe1.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sangwoojun/ulx3s_bsv/HEAD/projects/rv32i/sw/pipeunsafe1.bin -------------------------------------------------------------------------------- /projects/rv32i/sw/pipeunsafe2.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sangwoojun/ulx3s_bsv/HEAD/projects/rv32i/sw/pipeunsafe2.bin -------------------------------------------------------------------------------- /projects/rv32i/sw/pipeunsafe1.s: -------------------------------------------------------------------------------- 1 | .text 2 | addi t1, zero, 1024 3 | addi s1, zero, 1 4 | addi s2, zero, 2 5 | add s3, s1, s2 6 | sw s3, 0(t1) 7 | unimp 8 | 9 | 10 | .data 11 | 12 | -------------------------------------------------------------------------------- /projects/rv32i/sw/pipeunsafe2.s: -------------------------------------------------------------------------------- 1 | .text 2 | addi t1, zero, 1024 3 | la t0, testdata 4 | lw s1, 0(t0) 5 | lw s2, 4(t0) 6 | add s3, s1, s2 7 | sw s3, 0(t1) 8 | unimp 9 | 10 | 11 | .data 12 | testdata: 13 | .word 1, 2, 3 14 | -------------------------------------------------------------------------------- /projects/test/cpp/Makefile: -------------------------------------------------------------------------------- 1 | ROOTDIR=../../../ 2 | BSVPATH=$(ROOTDIR)/src/ 3 | CPPPATH=$(BSVPATH)/cpp/ 4 | 5 | 6 | all: $(wildcard *.cpp) $(wildcard *.h) $(wildcard $(CPPPATH)/*.cpp) $(wildcard $(CPPPATH)/*.h) 7 | mkdir -p obj 8 | g++ -o obj/main main.cpp $(CPPPATH)/ttyifc.cpp -lpthread -Wall -pedantic -DSYNTH 9 | -------------------------------------------------------------------------------- /projects/image_proc/cpp/Makefile: -------------------------------------------------------------------------------- 1 | ROOTDIR=../../../ 2 | BSVPATH=$(ROOTDIR)/src/ 3 | CPPPATH=$(BSVPATH)/cpp/ 4 | 5 | 6 | all: $(wildcard *.cpp) $(wildcard *.h) $(wildcard $(CPPPATH)/*.cpp) $(wildcard $(CPPPATH)/*.h) 7 | mkdir -p obj 8 | g++ -o obj/main main.cpp $(CPPPATH)/ttyifc.cpp -lpthread -Wall -pedantic -DSYNTH 9 | -------------------------------------------------------------------------------- /projects/matrix4x4/cpp/Makefile: -------------------------------------------------------------------------------- 1 | ROOTDIR=../../../ 2 | BSVPATH=$(ROOTDIR)/src/ 3 | CPPPATH=$(BSVPATH)/cpp/ 4 | 5 | 6 | all: $(wildcard *.cpp) $(wildcard *.h) $(wildcard $(CPPPATH)/*.cpp) $(wildcard $(CPPPATH)/*.h) 7 | mkdir -p obj 8 | g++ -o obj/main main.cpp $(CPPPATH)/ttyifc.cpp -lpthread -Wall -pedantic -DSYNTH 9 | -------------------------------------------------------------------------------- /projects/nn_fc/cpp/Makefile: -------------------------------------------------------------------------------- 1 | ROOTDIR=../../../ 2 | BSVPATH=$(ROOTDIR)/src/ 3 | CPPPATH=$(BSVPATH)/cpp/ 4 | 5 | 6 | all: $(wildcard *.cpp) $(wildcard *.h) $(wildcard $(CPPPATH)/*.cpp) $(wildcard $(CPPPATH)/*.h) 7 | mkdir -p obj 8 | g++ -o obj/main $(wildcard *.cpp) $(CPPPATH)/ttyifc.cpp -lpthread -Wall -pedantic -DSYNTH 9 | -------------------------------------------------------------------------------- /projects/rv32i/sw/pipesafe.s: -------------------------------------------------------------------------------- 1 | .text 2 | addi t1, zero, 1024 3 | addi s1, zero, 1 4 | addi s2, zero, 2 5 | nop 6 | nop 7 | nop 8 | nop 9 | nop 10 | nop 11 | nop 12 | nop 13 | add s3, s1, s2 14 | nop 15 | nop 16 | nop 17 | nop 18 | nop 19 | nop 20 | nop 21 | nop 22 | sw s3, 0(t1) 23 | unimp 24 | 25 | 26 | .data 27 | -------------------------------------------------------------------------------- /projects/nn_fc_zfpe/cpp/Makefile: -------------------------------------------------------------------------------- 1 | ROOTDIR=../../../ 2 | BSVPATH=$(ROOTDIR)/src/ 3 | CPPPATH=$(BSVPATH)/cpp/ 4 | 5 | 6 | all: $(wildcard *.cpp) $(wildcard *.h) $(wildcard $(CPPPATH)/*.cpp) $(wildcard $(CPPPATH)/*.h) 7 | mkdir -p obj 8 | g++ -o obj/main $(wildcard *.cpp) $(CPPPATH)/ttyifc.cpp -lpthread -Wall -pedantic -DSYNTH 9 | -------------------------------------------------------------------------------- /projects/nn_fc_accel/cpp/Makefile: -------------------------------------------------------------------------------- 1 | ROOTDIR=../../../ 2 | BSVPATH=$(ROOTDIR)/src/ 3 | CPPPATH=$(BSVPATH)/cpp/ 4 | 5 | 6 | all: $(wildcard *.cpp) $(wildcard *.h) $(wildcard $(CPPPATH)/*.cpp) $(wildcard $(CPPPATH)/*.h) 7 | mkdir -p obj 8 | g++ -o obj/main $(wildcard *.cpp) $(CPPPATH)/ttyifc.cpp -lpthread -Wall -pedantic -DSYNTH 9 | -------------------------------------------------------------------------------- /projects/nn_fc_zfpe/cpp/nn_fc.h: -------------------------------------------------------------------------------- 1 | #ifndef __NN_FC_H__ 2 | #define __NN_FC_H__ 3 | 4 | #include 5 | #include 6 | 7 | void nn_fc(uint8_t* comp_weights, uint8_t* comp_bias, uint8_t* comp_inputs, uint8_t* comp_add, size_t input_cnt, size_t input_dim, size_t output_dim, uint8_t* comp_outputs); 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /src/verilog/inout16.v: -------------------------------------------------------------------------------- 1 | module inout16 ( 2 | input wire clk, 3 | inout wire [15:0] inout_pins, 4 | 5 | input wire [15:0] write_data, 6 | output wire [15:0] read_data, 7 | input wire write_req 8 | ); 9 | 10 | assign inout_pins = (write_req ? write_data : 16'hzzzz); 11 | assign read_data = (write_req ? 16'hxxxx : inout_pins); 12 | endmodule 13 | -------------------------------------------------------------------------------- /projects/nn_fc/cpp/nn_fc.h: -------------------------------------------------------------------------------- 1 | #ifndef __NN_FC_H__ 2 | #define __NN_FC_H__ 3 | 4 | #include 5 | 6 | void nn_fc(float* matrix, float* input, int input_cnt, int input_dim, int output_dim, float* answer); 7 | 8 | typedef struct { 9 | float value; 10 | int input_idx; 11 | int output_idx; 12 | bool valid; 13 | } FC_Result; 14 | 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /projects/rv32i/processor/Defines.bsv: -------------------------------------------------------------------------------- 1 | typedef Bit#(32) Word; 2 | typedef Bit#(5) RIndx; 3 | typedef Bit#(2) SizeType; 4 | typedef enum {Fetch, Decode, Execute, Writeback} ProcStage deriving (Eq,Bits); 5 | 6 | typedef struct { 7 | Bit#(16) addr; 8 | Word word; 9 | SizeType bytes; // 1-index. value of 0 means 1 byte 10 | Bool write; 11 | } MemReq32 deriving (Bits); 12 | 13 | -------------------------------------------------------------------------------- /projects/image_proc/output2png.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | from PIL import Image 4 | image_out = Image.new(mode="RGB",size=(512,256)) 5 | data_out = open("output.dat", mode="rb") 6 | for pidx in range(0,256*512): 7 | pix = int.from_bytes(data_out.read(1), "little"); 8 | image_out.putpixel((pidx % 512, 255-int(pidx/512)), (pix,pix,pix)); 9 | 10 | image_out.save('output.png') 11 | image_out.show() 12 | -------------------------------------------------------------------------------- /src/bsv_verilog/RevertReg.v: -------------------------------------------------------------------------------- 1 | 2 | `ifdef BSV_ASSIGNMENT_DELAY 3 | `else 4 | `define BSV_ASSIGNMENT_DELAY 5 | `endif 6 | 7 | module RevertReg(CLK, Q_OUT, D_IN, EN); 8 | 9 | parameter width = 1; 10 | parameter init = { width {1'b0} } ; 11 | 12 | input CLK; 13 | input EN; 14 | input [width - 1 : 0] D_IN; 15 | output [width - 1 : 0] Q_OUT; 16 | 17 | assign Q_OUT = init; 18 | endmodule 19 | -------------------------------------------------------------------------------- /projects/rv32i/sw/pipeunsafe1.dump: -------------------------------------------------------------------------------- 1 | 2 | ././obj//binary.elf: file format elf32-littleriscv 3 | 4 | 5 | Disassembly of section .text: 6 | 7 | 00000000 <.text>: 8 | 0: 40000313 addi x6,x0,1024 9 | 4: 00100493 addi x9,x0,1 10 | 8: 00200913 addi x18,x0,2 11 | c: 012489b3 add x19,x9,x18 12 | 10: 01332023 sw x19,0(x6) 13 | 14: c0001073 unimp 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bluespec environment and libraries for Lattice ECP5, Specifically the ULX3S board 2 | 3 | UART, Mult18x18D, PLL, SDRAM access imported to Bluespec 4 | 5 | Provides simplified floating point library for Single precision floats 6 | 7 | ## Getting started 8 | 9 | First, go to "./tools" and run "make. 10 | The replacetoken utility is required to modify bluespec-generated verilog code to be compatible with yosys 11 | -------------------------------------------------------------------------------- /projects/nn_fc_accel/cpp/nn_fc.h: -------------------------------------------------------------------------------- 1 | #ifndef __NN_FC_H__ 2 | #define __NN_FC_H__ 3 | 4 | #include 5 | #include 6 | 7 | void nn_fc(uint8_t* comp_weights, uint8_t* comp_bias, uint8_t* comp_inputs, uint8_t* comp_add, size_t input_cnt, size_t input_dim, size_t output_dim, float* answer); 8 | 9 | typedef struct { 10 | float value; 11 | int input_idx; 12 | int output_idx; 13 | bool valid; 14 | } FC_Result; 15 | 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /projects/rv32i/sw/pipeunsafe2.dump: -------------------------------------------------------------------------------- 1 | 2 | ././obj//binary.elf: file format elf32-littleriscv 3 | 4 | 5 | Disassembly of section .text: 6 | 7 | 00000000 <.text>: 8 | 0: 40000313 addi x6,x0,1024 9 | 4: 00001297 auipc x5,0x1 10 | 8: ffc28293 addi x5,x5,-4 # 1000 11 | c: 0002a483 lw x9,0(x5) 12 | 10: 0042a903 lw x18,4(x5) 13 | 14: 012489b3 add x19,x9,x18 14 | 18: 01332023 sw x19,0(x6) 15 | 1c: c0001073 unimp 16 | -------------------------------------------------------------------------------- /src/cpp/ttyifc.h: -------------------------------------------------------------------------------- 1 | #ifndef __TTYIFC_H__ 2 | #define __TTYIFC_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | void set_tty_attributes(int fd, int baud); 16 | extern "C" uint32_t bdpiUartGet(uint8_t idx); 17 | extern "C" void bdpiUartPut(uint32_t d); 18 | uint32_t uart_recv(); 19 | void uart_send(uint8_t data); 20 | int open_tty(char* path); 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/cpp/bdpiSdram.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | 6 | #define SDRAM_BYTES (1024*1024*32) 7 | bool sdram_init_ready = false; 8 | uint16_t* sdram_buffer = NULL; 9 | 10 | extern "C" void bdpiWriteSdram(uint32_t addr, uint32_t data) { 11 | if ( !sdram_init_ready ) { 12 | sdram_buffer = (uint16_t*)malloc(SDRAM_BYTES); 13 | sdram_init_ready = true; 14 | } 15 | sdram_buffer[addr] = data; 16 | } 17 | 18 | extern "C" uint32_t bdpiReadSdram(uint32_t addr) { 19 | if ( !sdram_init_ready ) { 20 | sdram_buffer = (uint16_t*)malloc(SDRAM_BYTES); 21 | sdram_init_ready = true; 22 | } 23 | return sdram_buffer[addr]; 24 | } 25 | -------------------------------------------------------------------------------- /projects/nn_fc/replace_inout_token.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cat build/mkTop.v | ../../tools/bin/replacetoken ".XX_sdram_d_XX(mem_xx_inout16_XX_inout_pins)," "sdram_d," "mem_xx_inout16_XX_inout_pins" "sdram_d" > build/mkTop.v.new 3 | mv build/mkTop.v.new build/mkTop.v 4 | 5 | 6 | 7 | #cat build/mkTop.v | ../../tools/bin/replacetoken ".XX_sdram_d_XX(sdram_XX_sdram_d_XX)," "sdram_d," "sdram_XX_sdram_d_XX" "sdram_d" ".XX_sdram_d_XX(" ".sdram_d(" > build/mkTop.v.new 8 | #cat build/mkUlx3sSdram.v | ../../tools/bin/replacetoken ".XX_sdram_d_XX(xx_inout16_XX_inout_pins)," "sdram_d," "xx_inout16_XX_inout_pins" "sdram_d" > build/mkUlx3sSdram.v.new 9 | #mv build/mkUlx3sSdram.v.new build/mkUlx3sSdram.v 10 | -------------------------------------------------------------------------------- /projects/test/replace_inout_token.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cat build/mkTop.v | ../../tools/bin/replacetoken ".XX_sdram_d_XX(mem_xx_inout16_XX_inout_pins)," "sdram_d," "mem_xx_inout16_XX_inout_pins" "sdram_d" > build/mkTop.v.new 3 | mv build/mkTop.v.new build/mkTop.v 4 | 5 | 6 | 7 | #cat build/mkTop.v | ../../tools/bin/replacetoken ".XX_sdram_d_XX(sdram_XX_sdram_d_XX)," "sdram_d," "sdram_XX_sdram_d_XX" "sdram_d" ".XX_sdram_d_XX(" ".sdram_d(" > build/mkTop.v.new 8 | #cat build/mkUlx3sSdram.v | ../../tools/bin/replacetoken ".XX_sdram_d_XX(xx_inout16_XX_inout_pins)," "sdram_d," "xx_inout16_XX_inout_pins" "sdram_d" > build/mkUlx3sSdram.v.new 9 | #mv build/mkUlx3sSdram.v.new build/mkUlx3sSdram.v 10 | -------------------------------------------------------------------------------- /projects/image_proc/replace_inout_token.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cat build/mkTop.v | ../../tools/bin/replacetoken ".XX_sdram_d_XX(mem_xx_inout16_XX_inout_pins)," "sdram_d," "mem_xx_inout16_XX_inout_pins" "sdram_d" > build/mkTop.v.new 3 | mv build/mkTop.v.new build/mkTop.v 4 | 5 | 6 | 7 | #cat build/mkTop.v | ../../tools/bin/replacetoken ".XX_sdram_d_XX(sdram_XX_sdram_d_XX)," "sdram_d," "sdram_XX_sdram_d_XX" "sdram_d" ".XX_sdram_d_XX(" ".sdram_d(" > build/mkTop.v.new 8 | #cat build/mkUlx3sSdram.v | ../../tools/bin/replacetoken ".XX_sdram_d_XX(xx_inout16_XX_inout_pins)," "sdram_d," "xx_inout16_XX_inout_pins" "sdram_d" > build/mkUlx3sSdram.v.new 9 | #mv build/mkUlx3sSdram.v.new build/mkUlx3sSdram.v 10 | -------------------------------------------------------------------------------- /projects/matrix4x4/replace_inout_token.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cat build/mkTop.v | ../../tools/bin/replacetoken ".XX_sdram_d_XX(mem_xx_inout16_XX_inout_pins)," "sdram_d," "mem_xx_inout16_XX_inout_pins" "sdram_d" > build/mkTop.v.new 3 | mv build/mkTop.v.new build/mkTop.v 4 | 5 | 6 | 7 | #cat build/mkTop.v | ../../tools/bin/replacetoken ".XX_sdram_d_XX(sdram_XX_sdram_d_XX)," "sdram_d," "sdram_XX_sdram_d_XX" "sdram_d" ".XX_sdram_d_XX(" ".sdram_d(" > build/mkTop.v.new 8 | #cat build/mkUlx3sSdram.v | ../../tools/bin/replacetoken ".XX_sdram_d_XX(xx_inout16_XX_inout_pins)," "sdram_d," "xx_inout16_XX_inout_pins" "sdram_d" > build/mkUlx3sSdram.v.new 9 | #mv build/mkUlx3sSdram.v.new build/mkUlx3sSdram.v 10 | -------------------------------------------------------------------------------- /projects/nn_fc_accel/replace_inout_token.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cat build/mkTop.v | ../../tools/bin/replacetoken ".XX_sdram_d_XX(mem_xx_inout16_XX_inout_pins)," "sdram_d," "mem_xx_inout16_XX_inout_pins" "sdram_d" > build/mkTop.v.new 3 | mv build/mkTop.v.new build/mkTop.v 4 | 5 | 6 | 7 | #cat build/mkTop.v | ../../tools/bin/replacetoken ".XX_sdram_d_XX(sdram_XX_sdram_d_XX)," "sdram_d," "sdram_XX_sdram_d_XX" "sdram_d" ".XX_sdram_d_XX(" ".sdram_d(" > build/mkTop.v.new 8 | #cat build/mkUlx3sSdram.v | ../../tools/bin/replacetoken ".XX_sdram_d_XX(xx_inout16_XX_inout_pins)," "sdram_d," "xx_inout16_XX_inout_pins" "sdram_d" > build/mkUlx3sSdram.v.new 9 | #mv build/mkUlx3sSdram.v.new build/mkUlx3sSdram.v 10 | -------------------------------------------------------------------------------- /projects/nn_fc_zfpe/replace_inout_token.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cat build/mkTop.v | ../../tools/bin/replacetoken ".XX_sdram_d_XX(mem_xx_inout16_XX_inout_pins)," "sdram_d," "mem_xx_inout16_XX_inout_pins" "sdram_d" > build/mkTop.v.new 3 | mv build/mkTop.v.new build/mkTop.v 4 | 5 | 6 | 7 | #cat build/mkTop.v | ../../tools/bin/replacetoken ".XX_sdram_d_XX(sdram_XX_sdram_d_XX)," "sdram_d," "sdram_XX_sdram_d_XX" "sdram_d" ".XX_sdram_d_XX(" ".sdram_d(" > build/mkTop.v.new 8 | #cat build/mkUlx3sSdram.v | ../../tools/bin/replacetoken ".XX_sdram_d_XX(xx_inout16_XX_inout_pins)," "sdram_d," "xx_inout16_XX_inout_pins" "sdram_d" > build/mkUlx3sSdram.v.new 9 | #mv build/mkUlx3sSdram.v.new build/mkUlx3sSdram.v 10 | -------------------------------------------------------------------------------- /src/PLL.bsv: -------------------------------------------------------------------------------- 1 | package PLL; 2 | 3 | interface PLLIfc; 4 | interface Clock clk_125mhz; 5 | interface Clock clk_100mhz; 6 | interface Clock clk_25mhz; 7 | interface Reset rst_25mhz; 8 | interface Reset rst_125mhz; 9 | interface Reset rst_100mhz; 10 | endinterface 11 | 12 | import "BVI" pll_fastclk = 13 | module mkPllFast#(Clock clk) (PLLIfc); 14 | default_clock no_clock; 15 | default_reset no_reset; 16 | 17 | input_clock (clki_25mhz) = clk; 18 | 19 | output_clock clk_25mhz(clk_25mhz); 20 | output_clock clk_125mhz(clk_125mhz); 21 | output_clock clk_100mhz(clk_100mhz); 22 | output_reset rst_25mhz(lockedn) clocked_by(clk_25mhz); 23 | output_reset rst_125mhz(lockedn) clocked_by(clk_125mhz); 24 | output_reset rst_100mhz(lockedn) clocked_by(clk_100mhz); 25 | endmodule 26 | 27 | endpackage: PLL 28 | -------------------------------------------------------------------------------- /projects/rv32i/processor/BranchPredictor.bsv: -------------------------------------------------------------------------------- 1 | import Vector::*; 2 | import Defines::*; 3 | 4 | interface BranchPredictorIfc; 5 | method Word getNextPc(Word curpc); 6 | method Action setPrediction(Word curpc, Word nextpc); 7 | endinterface 8 | 9 | typedef 4 TableSize; 10 | 11 | module mkBranchPredictor(BranchPredictorIfc); 12 | Reg#(Vector#(TableSize, Bool)) bht <- mkReg(replicate(False)); 13 | Reg#(Vector#(TableSize, Word)) btb <- mkReg(replicate(0)); 14 | method Word getNextPc(Word curpc); 15 | Word r = curpc + 4; 16 | Bit#(TLog#(TableSize)) idx = truncate(curpc); 17 | if ( bht[idx] ) r = btb[idx]; 18 | 19 | return curpc + 4; 20 | endmethod 21 | method Action setPrediction(Word curpc, Word nextpc); 22 | Bit#(TLog#(TableSize)) idx = truncate(curpc); 23 | bht[idx] <= True; 24 | btb[idx] <= nextpc; 25 | endmethod 26 | endmodule 27 | -------------------------------------------------------------------------------- /src/verilog/mult18x18d.v: -------------------------------------------------------------------------------- 1 | module mult18x18d (dataout, dataax, dataay, clk, rstn); 2 | output [35:0] dataout; 3 | input [17:0] dataax, dataay; 4 | input clk,rstn; 5 | reg [35:0] dataout; 6 | reg [17:0] dataax_reg, dataay_reg; 7 | wire [35:0] dataout_node; 8 | reg [35:0] dataout_reg; 9 | always @(posedge clk or negedge rstn) 10 | begin 11 | if (!rstn) 12 | begin 13 | dataax_reg <= 0; 14 | dataay_reg <= 0; 15 | end 16 | else 17 | begin 18 | dataax_reg <= dataax; 19 | dataay_reg <= dataay; 20 | end 21 | end 22 | assign dataout_node = dataax_reg * dataay_reg; 23 | always @(posedge clk or negedge rstn) 24 | begin 25 | if (!rstn) 26 | dataout_reg <= 0; 27 | else 28 | dataout_reg <= dataout_node; 29 | end 30 | always @(posedge clk or negedge rstn) 31 | begin 32 | if (!rstn) 33 | dataout <= 0; 34 | else 35 | dataout <= dataout_reg; 36 | end 37 | endmodule 38 | -------------------------------------------------------------------------------- /projects/rv32i/sw/pipesafe.dump: -------------------------------------------------------------------------------- 1 | 2 | ././obj//binary.elf: file format elf32-littleriscv 3 | 4 | 5 | Disassembly of section .text: 6 | 7 | 00000000 <.text>: 8 | 0: 40000313 addi x6,x0,1024 9 | 4: 00100493 addi x9,x0,1 10 | 8: 00200913 addi x18,x0,2 11 | c: 00000013 addi x0,x0,0 12 | 10: 00000013 addi x0,x0,0 13 | 14: 00000013 addi x0,x0,0 14 | 18: 00000013 addi x0,x0,0 15 | 1c: 00000013 addi x0,x0,0 16 | 20: 00000013 addi x0,x0,0 17 | 24: 00000013 addi x0,x0,0 18 | 28: 00000013 addi x0,x0,0 19 | 2c: 012489b3 add x19,x9,x18 20 | 30: 00000013 addi x0,x0,0 21 | 34: 00000013 addi x0,x0,0 22 | 38: 00000013 addi x0,x0,0 23 | 3c: 00000013 addi x0,x0,0 24 | 40: 00000013 addi x0,x0,0 25 | 44: 00000013 addi x0,x0,0 26 | 48: 00000013 addi x0,x0,0 27 | 4c: 00000013 addi x0,x0,0 28 | 50: 01332023 sw x19,0(x6) 29 | 54: c0001073 unimp 30 | -------------------------------------------------------------------------------- /src/verilog/pll_fastclk.v: -------------------------------------------------------------------------------- 1 | module pll_fastclk( 2 | input clki_25mhz, 3 | output clk_25mhz, 4 | output clk_125mhz, 5 | output clk_100mhz, 6 | output lockedn 7 | ); 8 | wire clkfb; 9 | wire locked; 10 | assign lockedn = !locked; 11 | (* ICP_CURRENT="12" *) (* LPF_RESISTOR="8" *) (* MFG_ENABLE_FILTEROPAMP="1" *) (* MFG_GMCREF_SEL="2" *) 12 | EHXPLLL #( 13 | .PLLRST_ENA("DISABLED"), 14 | .INTFB_WAKE("DISABLED"), 15 | .STDBY_ENABLE("DISABLED"), 16 | .DPHASE_SOURCE("DISABLED"), 17 | .CLKOP_FPHASE(0), 18 | .CLKOP_CPHASE(0), 19 | .OUTDIVIDER_MUXA("DIVA"), 20 | .CLKOP_ENABLE("ENABLED"), 21 | .CLKOP_DIV(4), 22 | .CLKOS_ENABLE("ENABLED"), 23 | .CLKOS_DIV(5), 24 | .CLKOS_CPHASE(0), 25 | .CLKOS_FPHASE(0), 26 | .CLKOS2_ENABLE("ENABLED"), 27 | .CLKOS2_DIV(20), 28 | .CLKOS2_CPHASE(0), 29 | .CLKOS2_FPHASE(0), 30 | .CLKFB_DIV(10), 31 | .CLKI_DIV(1), 32 | .FEEDBK_PATH("INT_OP") 33 | ) pll_i ( 34 | .CLKI(clki), 35 | .CLKFB(clkfb), 36 | .CLKINTFB(clkfb), 37 | .CLKOP(clk_125mhz), 38 | .CLKOS(clk_100mhz), 39 | .CLKOS2(clk_25mhz), 40 | .RST(1'b0), 41 | .STDBY(1'b0), 42 | .PHASESEL0(1'b0), 43 | .PHASESEL1(1'b0), 44 | .PHASEDIR(1'b0), 45 | .PHASESTEP(1'b0), 46 | .PLLWAKESYNC(1'b0), 47 | .ENCLKOP(1'b0), 48 | .LOCK(locked) 49 | ); 50 | endmodule 51 | 52 | -------------------------------------------------------------------------------- /projects/rv32i/processor/RFile.bsv: -------------------------------------------------------------------------------- 1 | import Vector::*; 2 | import BRAM::*; 3 | import RegFile::*; 4 | 5 | typedef Bit#(32) Word; 6 | typedef Bit#(5) RIndx; 7 | 8 | // Register File 9 | 10 | interface RFile2R1W; 11 | method Word rd1(RIndx rindx); 12 | method Word rd2(RIndx rindx); 13 | method Action wr(RIndx rindx, Word data); 14 | // simulation-only debugging method 15 | method Action displayRFileInSimulation; 16 | endinterface 17 | 18 | module mkRFile2R1W(RFile2R1W); 19 | Vector#(32, Reg#(Word)) rfile <- replicateM(mkReg(0)); 20 | 21 | method Word rd1(RIndx rindx); 22 | return rfile[rindx]; 23 | endmethod 24 | method Word rd2(RIndx rindx); 25 | return rfile[rindx]; 26 | endmethod 27 | method Action wr(RIndx rindx, Word data); 28 | if (rindx != 0) begin 29 | rfile[rindx] <= data; 30 | end 31 | endmethod 32 | // simulation-only debugging method 33 | method Action displayRFileInSimulation; 34 | for (Integer i = 0 ; i < 32 ; i = i+1) begin 35 | $display("x%0d = 0x%x", i, rfile[i]); 36 | end 37 | $write("{x31, ..., x0} = 0x"); 38 | for (Integer i = 31 ; i >= 0 ; i = i-1) begin 39 | $write("%x", rfile[i]); 40 | end 41 | $write("\n"); 42 | endmethod 43 | endmodule 44 | 45 | // Memory 46 | -------------------------------------------------------------------------------- /projects/rv32i/sw/microbench.s: -------------------------------------------------------------------------------- 1 | .text 2 | 3 | # mult safe 4 | addi t0, zero, 2 5 | addi t1, zero, 3 6 | addi t2, zero, 7 7 | nop 8 | nop 9 | nop 10 | nop 11 | nop 12 | mul t0, t0, t1 13 | nop 14 | nop 15 | nop 16 | nop 17 | nop 18 | add t1, t2, t1 19 | nop 20 | nop 21 | nop 22 | nop 23 | nop 24 | mul t0, t0, t1 25 | nop 26 | nop 27 | nop 28 | nop 29 | nop 30 | addi t3, zero, 1024 31 | nop 32 | nop 33 | nop 34 | nop 35 | nop 36 | sw t0, 0(t3) 37 | 38 | # branch pipe safe 39 | branch: 40 | 41 | addi t0, zero, 1025 42 | addi t1, zero, 3 43 | addi t2, zero, 3 44 | addi t3, zero, 0 45 | nop 46 | nop 47 | nop 48 | nop 49 | nop 50 | nop 51 | 52 | beq t1, t2, skip 53 | 54 | sw t3, 0(t0) 55 | j multunsafe 56 | 57 | skip: 58 | sw t2, 0(t0) 59 | #unimp 60 | 61 | multunsafe: 62 | 63 | # mult pipe unsafe 64 | li t0, 2 65 | li t1, 7 66 | li t2, 17 67 | li t3, 5 68 | addi t2, t2, -4 69 | mul t2, t1, t2 # 7*13=91 70 | add t0, t3, t0 # 2+5=7 71 | sub t0, t2, t0 # 91-7 = 84 72 | addi t3, zero, 1026 73 | sw t0, 0(t3) 74 | 75 | 76 | # pip3 unsafe 2 77 | addi t1, zero, 1027 78 | la t0, testdata 79 | lw s1, 0(t0) 80 | lw s2, 4(t0) 81 | add s3, s1, s2 82 | sw s3, 0(t1) 83 | 84 | 85 | 86 | 87 | 88 | 89 | unimp 90 | 91 | 92 | .data 93 | testdata: 94 | .word 1, 2, 3 95 | 96 | 97 | 98 | #branch1.s 99 | #mult.s 100 | #pipesafe.s 101 | #pipeunsafe1.s 102 | #pipeunsafe2.s 103 | -------------------------------------------------------------------------------- /tools/replacetoken.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int 6 | main(int argc, char** argv) { 7 | int paircnt = (argc-1)/2; 8 | char** fromtokens = (char**)malloc(paircnt*sizeof(char*)); 9 | char** totokens = (char**)malloc(paircnt*sizeof(char*)); 10 | for ( int i = 0; i < (argc-1)/2; i++ ) { 11 | fromtokens[i] = argv[1+i*2]; 12 | totokens[i] = argv[2+i*2]; 13 | } 14 | 15 | size_t rbufsz = 8192; 16 | char* rbf = (char*)malloc(sizeof(char)*rbufsz); 17 | size_t tbufsz = 8192; 18 | char* tbf = (char*)malloc(sizeof(char)*tbufsz); 19 | 20 | while(!feof(stdin)) { 21 | if ( getline(&rbf, &rbufsz, stdin) < 0 ) break; 22 | 23 | for ( int i = 0; i < paircnt; i++ ) { 24 | char* pf = strstr(rbf, fromtokens[i]); 25 | if ( !pf ) continue; 26 | 27 | //printf( ">> %s", rbf ); 28 | if (strlen(rbf) + strlen(totokens[i]) - strlen(fromtokens[i]) >= rbufsz) { 29 | size_t newdiff = strlen(totokens[i]) - strlen(fromtokens[i]); 30 | rbf = (char*)realloc(rbf, rbufsz+newdiff); 31 | rbufsz += newdiff; 32 | } 33 | if (rbufsz != tbufsz) { 34 | tbf = (char*)realloc(tbf, rbufsz); 35 | tbufsz = rbufsz; 36 | } 37 | 38 | int offset = pf-rbf; 39 | strncpy(tbf, rbf, offset); 40 | 41 | strcpy(tbf+offset + strlen(totokens[i]), pf + strlen(fromtokens[i])); 42 | strncpy(tbf+offset, totokens[i], strlen(totokens[i])); 43 | strcpy(rbf,tbf); 44 | //printf( "<< %s", tbf ); 45 | } 46 | printf( "%s", rbf ); 47 | 48 | } 49 | 50 | return 0; 51 | } 52 | -------------------------------------------------------------------------------- /projects/image_proc/cpp/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "../../../src/cpp/ttyifc.h" 10 | 11 | 12 | void* swmain(void* param) { 13 | 14 | FILE* fin = fopen("example.dat", "rb"); 15 | FILE* fout = fopen("output.dat", "wb"); 16 | if ( fin == NULL ) { 17 | printf( "Input file not found!\n" ); 18 | exit(0); 19 | } 20 | if ( fout == NULL ) { 21 | printf( "Output file failed to open\n" ); 22 | exit(0); 23 | } 24 | 25 | printf( "Starting image processing!\n" ); 26 | 27 | int pixcount = 0; 28 | int writecnt = 0; 29 | while (!feof(fin)) { 30 | uint8_t pix; 31 | size_t r = fread(&pix, sizeof(uint8_t), 1, fin); 32 | if ( r != 1 ) continue; 33 | uart_send(pix); 34 | 35 | pixcount++; 36 | 37 | uint32_t rcheck = uart_recv(); 38 | while (rcheck <= 0xff) { 39 | fwrite(&rcheck, sizeof(uint8_t), 1, fout); 40 | writecnt++; 41 | rcheck = uart_recv(); 42 | } 43 | if (pixcount >= 512*256) break; 44 | } 45 | 46 | while (writecnt < 256*512) { 47 | uint32_t rcheck = uart_recv(); 48 | while ( rcheck > 0xff ) { 49 | rcheck = uart_recv(); 50 | } 51 | fwrite(&rcheck, sizeof(uint8_t), 1, fout); 52 | writecnt++; 53 | } 54 | 55 | printf( "Image processing done! %d pixels\n", pixcount ); 56 | printf( "Output written to output.dat\n" ); 57 | exit(0); 58 | } 59 | 60 | int 61 | main() { 62 | int ret = open_tty("/dev/ttyUSB0"); 63 | if ( ret ) return ret; 64 | 65 | swmain(NULL); 66 | } 67 | -------------------------------------------------------------------------------- /projects/rv32i/Makefile: -------------------------------------------------------------------------------- 1 | ROOTDIR=../../ 2 | 3 | BSCFLAGS = -show-schedule -show-range-conflict -aggressive-conditions 4 | BUILD_DIR=./build/ 5 | BSIM_DIR=./bsim/ 6 | BSVPATH=$(ROOTDIR)/src/ 7 | BSVPATH_ALL=$(BSVPATH):./processor 8 | 9 | BSCFLAGS_SYNTH = -bdir $(BUILD_DIR) -vdir $(BUILD_DIR) -simdir $(BUILD_DIR) -info-dir $(BUILD_DIR) -fdir $(BUILD_DIR) 10 | 11 | BSCFLAGS_BSIM = -bdir $(BSIM_DIR) -vdir $(BSIM_DIR) -simdir $(BSIM_DIR) -info-dir $(BSIM_DIR) -fdir $(BSIM_DIR) -D BSIM -l pthread 12 | 13 | BSIM_CPPFILES= ./cpp/main.cpp 14 | #$(ROOTDIR)/cpp/PcieBdpi.cpp \ 15 | $(ROOTDIR)/cpp/ShmFifo.cpp 16 | 17 | all: Top.bsv $(wildcard $(BSVPATH)/*.bsv) $(wildcard processor/*.bsv) cpp/main.cpp 18 | rm -rf build 19 | mkdir -p build 20 | cd $(BUILD_DIR); apio init -b ulx3s-85f -p . 21 | bsc $(BSCFLAGS) $(BSCFLAGS_SYNTH) -remove-dollar -p +:$(BSVPATH_ALL) -verilog -u -g mkTop ./Top.bsv 22 | #cp verilog/*.v build 23 | cp $(BSVPATH)/*.lpf build 24 | cp $(BSVPATH)/bsv_verilog/*.v build 25 | cd build; apio verify 26 | cd build; apio build -v 27 | 28 | bsim: Top.bsv $(wildcard $(BSVPATH)/*.bsv) $(wildcard processor/*.bsv) cpp/main.cpp 29 | mkdir -p $(BSIM_DIR) 30 | bsc $(BSCFLAGS) $(BSCFLAGS_BSIM) $(DEBUGFLAGS) -p +:$(BSVPATH_ALL) -sim -u -g mkTop_bsim ./Top.bsv 31 | bsc $(BSCFLAGS) $(BSCFLAGS_BSIM) $(DEBUGFLAGS) -sim -e mkTop_bsim -o $(BSIM_DIR)/bsim $(BSIM_DIR)/*.ba $(BSIM_CPPFILES) 32 | 33 | runsim: bsim 34 | ./bsim/bsim 2> output.log | tee system.log 35 | 36 | program: 37 | cd build; apio upload 38 | 39 | clean: 40 | rm -rf build 41 | rm -rf bsim 42 | 43 | -------------------------------------------------------------------------------- /projects/nn_fc/cpp/nn_fc.cpp: -------------------------------------------------------------------------------- 1 | #include "nn_fc.h" 2 | 3 | extern void send_weight(float data); 4 | extern void send_input(float value, int input_idx); 5 | extern FC_Result recv_result(); 6 | 7 | 8 | void nn_fc(float* matrix, float* input, int input_cnt, int input_dim, int output_dim, float* answer) { 9 | 10 | int pe_ways = 16; 11 | 12 | for ( int i = 0; i < output_dim/pe_ways; i++ ) { 13 | for ( int j = 0; j < input_dim; j++ ) { 14 | for ( int k = 0; k < pe_ways; k++ ) { 15 | send_weight(matrix[(i*pe_ways+k)*input_dim + j]); 16 | } 17 | } 18 | } 19 | 20 | int done_cnt = 0; 21 | for ( int i = 0; i < input_cnt; i++ ) { 22 | for ( int j = 0; j < output_dim/pe_ways; j++ ) { 23 | for ( int k = 0; k < input_dim; k++ ) { 24 | send_input(input[i*input_dim + k], i); 25 | } 26 | 27 | FC_Result res = recv_result(); 28 | while (res.valid) { 29 | int idx = res.input_idx*output_dim + res.output_idx; 30 | answer[idx] = res.value; 31 | done_cnt++; 32 | printf( "Writing %f to mem %d <%d,%d> (%d)\n", res.value, idx, res.input_idx, res.output_idx, done_cnt ); 33 | fflush(stdout); 34 | res = recv_result(); 35 | } 36 | } 37 | } 38 | printf( "Finished sending all data\n" ); 39 | fflush(stdout); 40 | 41 | while (done_cnt < output_dim*input_cnt) { 42 | FC_Result res = recv_result(); 43 | if ( !res.valid ) continue; 44 | 45 | int idx = res.input_idx*output_dim + res.output_idx; 46 | answer[idx] = res.value; 47 | //printf( "Writing %f to mem %d <%d,%d> (%d)\n", res.value, idx, res.input_idx, res.output_idx, done_cnt ); 48 | fflush(stdout); 49 | done_cnt++; 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /projects/nn_fc_zfpe/cpp/nn_fc.cpp: -------------------------------------------------------------------------------- 1 | #include "nn_fc.h" 2 | 3 | extern void send_weight(uint8_t data); 4 | extern void send_input(uint8_t data, int input_idx); 5 | extern uint8_t recv_result(); 6 | 7 | 8 | void nn_fc(uint8_t* comp_weights, uint8_t* comp_bias, uint8_t* comp_inputs, uint8_t* comp_add, size_t input_cnt, size_t input_dim, size_t output_dim, uint8_t* comp_outputs) { 9 | 10 | int idx = 0; 11 | 12 | size_t done_cnt = 0; 13 | 14 | size_t comp_buffer_size = 5; 15 | size_t comp_input_cnt = (input_cnt/4)*comp_buffer_size; 16 | size_t comp_output_dim = (output_dim/4)*comp_buffer_size; 17 | 18 | size_t period = comp_output_dim/64; 19 | // Send compressed weights & bias 20 | for ( size_t i = 0; i < comp_output_dim/period; i++ ) { 21 | for ( size_t j = 0; j < input_dim; j++ ) { 22 | for ( size_t k = 0; k < period; k++ ) { 23 | send_weight(comp_weights[((i*period+k)*input_dim) + j]); 24 | } 25 | } 26 | for ( size_t l = (i)*period; l < (i+1)*period; l++ ) { 27 | send_weight(comp_bias[l]); 28 | } 29 | } 30 | // Send compressed inputs 31 | for ( size_t i = 0; i < input_dim; i++ ) { 32 | for ( size_t j = 0; j < comp_input_cnt; j++ ) { 33 | send_input(comp_inputs[j*input_dim + i], 0); 34 | } 35 | } 36 | // Send compressed additional values 37 | for ( size_t i = 0; i < comp_output_dim; i++ ) { 38 | send_input(comp_add[i], 0); 39 | } 40 | 41 | printf( "Finished sending all data\n" ); 42 | fflush(stdout); 43 | 44 | while (done_cnt < output_dim*input_cnt) { 45 | uint8_t res = recv_result(); 46 | 47 | comp_outputs[idx] = res; 48 | //printf( "Writing %f to mem %d <%d,%d> (%zd)\n", res.value, idx, res.input_idx, res.output_idx, done_cnt ); 49 | fflush(stdout); 50 | done_cnt ++; 51 | idx ++; 52 | //printf( "done cnt: %d\n", done_cnt ); 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /projects/nn_fc_accel/cpp/nn_fc.cpp: -------------------------------------------------------------------------------- 1 | #include "nn_fc.h" 2 | 3 | extern void send_weight(uint8_t data); 4 | extern void send_input(uint8_t data, int input_idx); 5 | extern FC_Result recv_result(); 6 | 7 | 8 | void nn_fc(uint8_t* comp_weights, uint8_t* comp_bias, uint8_t* comp_inputs, uint8_t* comp_add, size_t input_cnt, size_t input_dim, size_t output_dim, float* outputs) { 9 | 10 | size_t done_cnt = 0; 11 | 12 | size_t comp_buffer_size = 5; 13 | size_t comp_input_cnt = (input_cnt/4)*comp_buffer_size; 14 | size_t comp_output_dim = (output_dim/4)*comp_buffer_size; 15 | 16 | size_t period = comp_output_dim/64; 17 | // Send compressed weights & bias 18 | for ( size_t i = 0; i < comp_output_dim/period; i++ ) { 19 | for ( size_t j = 0; j < input_dim; j++ ) { 20 | for ( size_t k = 0; k < period; k++ ) { 21 | send_weight(comp_weights[((i*period+k)*input_dim) + j]); 22 | } 23 | } 24 | for ( size_t l = (i)*period; l < (i+1)*period; l++ ) { 25 | send_weight(comp_bias[l]); 26 | } 27 | } 28 | // Send compressed inputs 29 | for ( size_t i = 0; i < input_dim; i++ ) { 30 | for ( size_t j = 0; j < comp_input_cnt; j++ ) { 31 | send_input(comp_inputs[j*input_dim + i], 0); 32 | } 33 | } 34 | // Send compressed additional values 35 | for ( size_t i = 0; i < comp_output_dim; i++ ) { 36 | send_input(comp_add[i], 0); 37 | } 38 | 39 | printf( "Finished sending all data\n" ); 40 | fflush(stdout); 41 | 42 | while (done_cnt < output_dim*input_cnt) { 43 | FC_Result res = recv_result(); 44 | if ( !res.valid ) continue; 45 | 46 | int idx = res.input_idx*output_dim + res.output_idx; 47 | outputs[idx] = res.value; 48 | //printf( "Writing %f to mem %d <%d,%d> (%zd)\n", res.value, idx, res.input_idx, res.output_idx, done_cnt ); 49 | fflush(stdout); 50 | done_cnt++; 51 | //printf( "done cnt: %d\n", done_cnt ); 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /projects/image_proc/HwMain.bsv: -------------------------------------------------------------------------------- 1 | import Clocks :: *; 2 | import Vector::*; 3 | import FIFO::*; 4 | import BRAM::*; 5 | import BRAMFIFO::*; 6 | import Uart::*; 7 | import Sdram::*; 8 | 9 | interface HwMainIfc; 10 | method ActionValue#(Bit#(8)) serial_tx; 11 | method Action serial_rx(Bit#(8) rx); 12 | endinterface 13 | 14 | module mkHwMain#(Ulx3sSdramUserIfc mem) (HwMainIfc); 15 | Clock curclk <- exposeCurrentClock; 16 | Reset currst <- exposeCurrentReset; 17 | 18 | Reg#(Bit#(32)) cycles <- mkReg(0); 19 | Reg#(Bit#(32)) cycleOutputStart <- mkReg(0); 20 | rule incCyclecount; 21 | cycles <= cycles + 1; 22 | endrule 23 | 24 | Reg#(Bit#(32)) cycleBegin <- mkReg(0); 25 | 26 | FIFO#(Bit#(8)) serialrxQ <- mkFIFO; 27 | FIFO#(Bit#(8)) serialtxQ <- mkFIFO; 28 | 29 | FIFO#(Bit#(8)) rowbufferQ1 <- mkSizedBRAMFIFO(512); 30 | FIFO#(Bit#(8)) rowbufferQ2 <- mkSizedBRAMFIFO(512); 31 | 32 | rule relayRow1; 33 | serialrxQ.deq; 34 | let pix = serialrxQ.first; 35 | rowbufferQ1.enq(pix); 36 | endrule 37 | 38 | rule relayRow2; 39 | rowbufferQ1.deq; 40 | let pix = rowbufferQ1.first; 41 | rowbufferQ2.enq(pix); 42 | endrule 43 | 44 | rule relayRow3; 45 | rowbufferQ2.deq; 46 | let pix = rowbufferQ2.first; 47 | serialtxQ.enq(pix); 48 | endrule 49 | 50 | Reg#(Bit#(32)) pixOutCnt <- mkReg(0); 51 | method ActionValue#(Bit#(8)) serial_tx; 52 | if ( cycleOutputStart == 0 ) begin 53 | $write( "Impage processing latency: %d cycles\n", cycles - cycleBegin ); 54 | cycleOutputStart <= cycles; 55 | end 56 | if ( pixOutCnt + 1 >= 512*256 ) begin 57 | $write( "Impage processing total cycles: %d\n", cycles - cycleBegin ); 58 | end 59 | pixOutCnt <= pixOutCnt + 1; 60 | serialtxQ.deq; 61 | return serialtxQ.first(); 62 | endmethod 63 | method Action serial_rx(Bit#(8) d); 64 | if ( cycleBegin == 0 ) cycleBegin <= cycles; 65 | serialrxQ.enq(d); 66 | endmethod 67 | endmodule 68 | -------------------------------------------------------------------------------- /projects/test/Makefile: -------------------------------------------------------------------------------- 1 | ROOTDIR=../../ 2 | 3 | BSCFLAGS = -show-schedule -show-range-conflict -aggressive-conditions 4 | BUILD_DIR=./build/ 5 | BSIM_DIR=./bsim/ 6 | BSVPATH=$(ROOTDIR)/src/ 7 | BSVPATH_ALL=$(BSVPATH) 8 | 9 | BSCFLAGS_SYNTH = -bdir $(BUILD_DIR) -vdir $(BUILD_DIR) -simdir $(BUILD_DIR) -info-dir $(BUILD_DIR) -fdir $(BUILD_DIR) 10 | 11 | BSCFLAGS_BSIM = -bdir $(BSIM_DIR) -vdir $(BSIM_DIR) -simdir $(BSIM_DIR) -info-dir $(BSIM_DIR) -fdir $(BSIM_DIR) -D BSIM -l pthread 12 | 13 | BSIM_CPPFILES= $(wildcard ./cpp/*.cpp) $(wildcard $(BSVPATH)/cpp/*.cpp) 14 | #$(ROOTDIR)/cpp/PcieBdpi.cpp \ 15 | $(ROOTDIR)/cpp/ShmFifo.cpp 16 | 17 | all: $(wildcard $(BSVPATH)/*.bsv) $(wildcard ./*.bsv) $(wildcard ./cpp/*.cpp) $(wildcard $(ROOTDIR)/cpp/*.cpp) hostsoft 18 | rm -rf build 19 | mkdir -p build 20 | cd $(BUILD_DIR); apio init -b ulx3s-85f -p . 21 | bsc $(BSCFLAGS) $(BSCFLAGS_SYNTH) -remove-dollar -p +:$(BSVPATH_ALL) -verilog -u -g mkTop $(BSVPATH)/Top.bsv 22 | cp $(BSVPATH)/*.lpf build 23 | cp $(BSVPATH)/bsv_verilog/*.v build 24 | cp $(BSVPATH)/verilog/*.v build 25 | bash replace_inout_token.sh 26 | #cd build; apio verify 27 | cd build; apio build -v 28 | 29 | hostsoft: $(wildcard ./cpp/*.cpp) $(wildcard ./cpp/*.h) $(wildcard $(BSVPATH)/cpp/*.cpp) $(wildcard $(BSVPATH)/cpp/*.h) 30 | make -C ./cpp 31 | 32 | bsim: $(wildcard ./*bsv) $(wildcard $(BSVPATH)/*.bsv) $(wildcard ./cpp/*.cpp) $(wildcard $(BSVPATH)/cpp/*.cpp) $(wildcard $(BSVPATH)/cpp/*.h) 33 | mkdir -p $(BSIM_DIR) 34 | bsc $(BSCFLAGS) $(BSCFLAGS_BSIM) $(DEBUGFLAGS) -p +:$(BSVPATH_ALL) -sim -u -g mkTop_bsim $(BSVPATH)/Top.bsv 35 | bsc $(BSCFLAGS) $(BSCFLAGS_BSIM) $(DEBUGFLAGS) -sim -e mkTop_bsim -o $(BSIM_DIR)/bsim $(BSIM_DIR)/*.ba $(BSIM_CPPFILES) 36 | 37 | runsim: bsim 38 | ./bsim/bsim 2> output.log | tee system.log 39 | 40 | program: 41 | cd build; apio upload 42 | 43 | clean: 44 | rm -rf build 45 | rm -rf bsim 46 | 47 | -------------------------------------------------------------------------------- /projects/matrix4x4/Makefile: -------------------------------------------------------------------------------- 1 | ROOTDIR=../../ 2 | 3 | BSCFLAGS = -show-schedule -show-range-conflict -aggressive-conditions 4 | BUILD_DIR=./build/ 5 | BSIM_DIR=./bsim/ 6 | BSVPATH=$(ROOTDIR)/src/ 7 | BSVPATH_ALL=$(BSVPATH) 8 | 9 | BSCFLAGS_SYNTH = -bdir $(BUILD_DIR) -vdir $(BUILD_DIR) -simdir $(BUILD_DIR) -info-dir $(BUILD_DIR) -fdir $(BUILD_DIR) 10 | 11 | BSCFLAGS_BSIM = -bdir $(BSIM_DIR) -vdir $(BSIM_DIR) -simdir $(BSIM_DIR) -info-dir $(BSIM_DIR) -fdir $(BSIM_DIR) -D BSIM -l pthread 12 | 13 | BSIM_CPPFILES= $(wildcard ./cpp/*.cpp) $(wildcard $(BSVPATH)/cpp/*.cpp) 14 | #$(ROOTDIR)/cpp/PcieBdpi.cpp \ 15 | $(ROOTDIR)/cpp/ShmFifo.cpp 16 | 17 | all: $(wildcard $(BSVPATH)/*.bsv) $(wildcard ./*.bsv) $(wildcard ./cpp/*.cpp) $(wildcard $(ROOTDIR)/cpp/*.cpp) hostsoft 18 | rm -rf build 19 | mkdir -p build 20 | cd $(BUILD_DIR); apio init -b ulx3s-85f -p . 21 | bsc $(BSCFLAGS) $(BSCFLAGS_SYNTH) -remove-dollar -p +:$(BSVPATH_ALL) -verilog -u -g mkTop $(BSVPATH)/Top.bsv 22 | cp $(BSVPATH)/*.lpf build 23 | cp $(BSVPATH)/bsv_verilog/*.v build 24 | cp $(BSVPATH)/verilog/*.v build 25 | bash replace_inout_token.sh 26 | #cd build; apio verify 27 | cd build; apio build -v 28 | 29 | hostsoft: $(wildcard ./cpp/*.cpp) $(wildcard ./cpp/*.h) $(wildcard $(BSVPATH)/cpp/*.cpp) $(wildcard $(BSVPATH)/cpp/*.h) 30 | make -C ./cpp 31 | 32 | bsim: $(wildcard ./*bsv) $(wildcard $(BSVPATH)/*.bsv) $(wildcard ./cpp/*.cpp) $(wildcard $(BSVPATH)/cpp/*.cpp) $(wildcard $(BSVPATH)/cpp/*.h) 33 | mkdir -p $(BSIM_DIR) 34 | bsc $(BSCFLAGS) $(BSCFLAGS_BSIM) $(DEBUGFLAGS) -p +:$(BSVPATH_ALL) -sim -u -g mkTop_bsim $(BSVPATH)/Top.bsv 35 | bsc $(BSCFLAGS) $(BSCFLAGS_BSIM) $(DEBUGFLAGS) -sim -e mkTop_bsim -o $(BSIM_DIR)/bsim $(BSIM_DIR)/*.ba $(BSIM_CPPFILES) 36 | 37 | runsim: bsim 38 | ./bsim/bsim 2> output.log | tee system.log 39 | 40 | program: 41 | cd build; apio upload 42 | 43 | clean: 44 | rm -rf build 45 | rm -rf bsim 46 | 47 | -------------------------------------------------------------------------------- /projects/nn_fc/Makefile: -------------------------------------------------------------------------------- 1 | ROOTDIR=../../ 2 | 3 | BSCFLAGS = -show-schedule -show-range-conflict -aggressive-conditions 4 | BUILD_DIR=./build/ 5 | BSIM_DIR=./bsim/ 6 | BSVPATH=$(ROOTDIR)/src/ 7 | BSVPATH_ALL=$(BSVPATH) 8 | 9 | BSCFLAGS_SYNTH = -bdir $(BUILD_DIR) -vdir $(BUILD_DIR) -simdir $(BUILD_DIR) -info-dir $(BUILD_DIR) -fdir $(BUILD_DIR) 10 | 11 | BSCFLAGS_BSIM = -bdir $(BSIM_DIR) -vdir $(BSIM_DIR) -simdir $(BSIM_DIR) -info-dir $(BSIM_DIR) -fdir $(BSIM_DIR) -D BSIM -l pthread 12 | 13 | BSIM_CPPFILES= $(wildcard ./cpp/*.cpp) $(wildcard $(BSVPATH)/cpp/*.cpp) 14 | #$(ROOTDIR)/cpp/PcieBdpi.cpp \ 15 | $(ROOTDIR)/cpp/ShmFifo.cpp 16 | 17 | all: $(wildcard $(BSVPATH)/*.bsv) $(wildcard ./*.bsv) $(wildcard ./cpp/*.cpp) $(wildcard $(ROOTDIR)/cpp/*.cpp) hostsoft 18 | rm -rf build 19 | mkdir -p build 20 | cd $(BUILD_DIR); apio init -b ulx3s-85f -p . 21 | bsc $(BSCFLAGS) $(BSCFLAGS_SYNTH) -remove-dollar -p +:$(BSVPATH_ALL) -verilog -u -g mkTop $(BSVPATH)/Top.bsv 22 | cp $(BSVPATH)/*.lpf build 23 | cp $(BSVPATH)/bsv_verilog/*.v build 24 | cp $(BSVPATH)/verilog/*.v build 25 | bash replace_inout_token.sh 26 | #cd build; apio verify 27 | cd build; apio build -v 28 | 29 | hostsoft: $(wildcard ./cpp/*.cpp) $(wildcard ./cpp/*.h) $(wildcard $(BSVPATH)/cpp/*.cpp) $(wildcard $(BSVPATH)/cpp/*.h) 30 | make -C ./cpp 31 | 32 | bsim: $(wildcard ./*bsv) $(wildcard $(BSVPATH)/*.bsv) $(wildcard ./cpp/*.cpp) $(wildcard $(BSVPATH)/cpp/*.cpp) $(wildcard $(BSVPATH)/cpp/*.h) 33 | mkdir -p $(BSIM_DIR) 34 | bsc $(BSCFLAGS) $(BSCFLAGS_BSIM) $(DEBUGFLAGS) -p +:$(BSVPATH_ALL) -sim -u -g mkTop_bsim $(BSVPATH)/Top.bsv 35 | bsc $(BSCFLAGS) $(BSCFLAGS_BSIM) $(DEBUGFLAGS) -sim -e mkTop_bsim -o $(BSIM_DIR)/bsim $(BSIM_DIR)/*.ba $(BSIM_CPPFILES) 36 | 37 | runsim: bsim 38 | ./bsim/bsim 2> output.log | tee system.log 39 | 40 | program: 41 | cd build; apio upload 42 | 43 | clean: 44 | rm -rf build 45 | rm -rf bsim 46 | 47 | -------------------------------------------------------------------------------- /projects/nn_fc_accel/Makefile: -------------------------------------------------------------------------------- 1 | ROOTDIR=../../ 2 | 3 | BSCFLAGS = -show-schedule -show-range-conflict -aggressive-conditions 4 | BUILD_DIR=./build/ 5 | BSIM_DIR=./bsim/ 6 | BSVPATH=$(ROOTDIR)/src/ 7 | BSVPATH_ALL=$(BSVPATH) 8 | 9 | BSCFLAGS_SYNTH = -bdir $(BUILD_DIR) -vdir $(BUILD_DIR) -simdir $(BUILD_DIR) -info-dir $(BUILD_DIR) -fdir $(BUILD_DIR) 10 | 11 | BSCFLAGS_BSIM = -bdir $(BSIM_DIR) -vdir $(BSIM_DIR) -simdir $(BSIM_DIR) -info-dir $(BSIM_DIR) -fdir $(BSIM_DIR) -D BSIM -l pthread 12 | 13 | BSIM_CPPFILES= $(wildcard ./cpp/*.cpp) $(wildcard $(BSVPATH)/cpp/*.cpp) 14 | #$(ROOTDIR)/cpp/PcieBdpi.cpp \ 15 | $(ROOTDIR)/cpp/ShmFifo.cpp 16 | 17 | all: $(wildcard $(BSVPATH)/*.bsv) $(wildcard ./*.bsv) $(wildcard ./cpp/*.cpp) $(wildcard $(ROOTDIR)/cpp/*.cpp) hostsoft 18 | rm -rf build 19 | mkdir -p build 20 | cd $(BUILD_DIR); apio init -b ulx3s-85f -p . 21 | bsc $(BSCFLAGS) $(BSCFLAGS_SYNTH) -remove-dollar -p +:$(BSVPATH_ALL) -verilog -u -g mkTop $(BSVPATH)/Top.bsv 22 | cp $(BSVPATH)/*.lpf build 23 | cp $(BSVPATH)/bsv_verilog/*.v build 24 | cp $(BSVPATH)/verilog/*.v build 25 | bash replace_inout_token.sh 26 | #cd build; apio verify 27 | cd build; apio build -v 28 | 29 | hostsoft: $(wildcard ./cpp/*.cpp) $(wildcard ./cpp/*.h) $(wildcard $(BSVPATH)/cpp/*.cpp) $(wildcard $(BSVPATH)/cpp/*.h) 30 | make -C ./cpp 31 | 32 | bsim: $(wildcard ./*bsv) $(wildcard $(BSVPATH)/*.bsv) $(wildcard ./cpp/*.cpp) $(wildcard $(BSVPATH)/cpp/*.cpp) $(wildcard $(BSVPATH)/cpp/*.h) 33 | mkdir -p $(BSIM_DIR) 34 | bsc $(BSCFLAGS) $(BSCFLAGS_BSIM) $(DEBUGFLAGS) -p +:$(BSVPATH_ALL) -sim -u -g mkTop_bsim $(BSVPATH)/Top.bsv 35 | bsc $(BSCFLAGS) $(BSCFLAGS_BSIM) $(DEBUGFLAGS) -sim -e mkTop_bsim -o $(BSIM_DIR)/bsim $(BSIM_DIR)/*.ba $(BSIM_CPPFILES) 36 | 37 | runsim: bsim 38 | ./bsim/bsim 2> output.log | tee system.log 39 | 40 | program: 41 | cd build; apio upload 42 | 43 | clean: 44 | rm -rf build 45 | rm -rf bsim 46 | 47 | -------------------------------------------------------------------------------- /projects/image_proc/Makefile: -------------------------------------------------------------------------------- 1 | ROOTDIR=../../ 2 | 3 | BSCFLAGS = -show-schedule -show-range-conflict -aggressive-conditions 4 | BUILD_DIR=./build/ 5 | BSIM_DIR=./bsim/ 6 | BSVPATH=$(ROOTDIR)/src/ 7 | BSVPATH_ALL=$(BSVPATH) 8 | 9 | BSCFLAGS_SYNTH = -bdir $(BUILD_DIR) -vdir $(BUILD_DIR) -simdir $(BUILD_DIR) -info-dir $(BUILD_DIR) -fdir $(BUILD_DIR) 10 | 11 | BSCFLAGS_BSIM = -bdir $(BSIM_DIR) -vdir $(BSIM_DIR) -simdir $(BSIM_DIR) -info-dir $(BSIM_DIR) -fdir $(BSIM_DIR) -D BSIM -l pthread 12 | 13 | BSIM_CPPFILES= $(wildcard ./cpp/*.cpp) $(wildcard $(BSVPATH)/cpp/*.cpp) 14 | #$(ROOTDIR)/cpp/PcieBdpi.cpp \ 15 | $(ROOTDIR)/cpp/ShmFifo.cpp 16 | 17 | all: $(wildcard $(BSVPATH)/*.bsv) $(wildcard ./*.bsv) $(wildcard ./cpp/*.cpp) $(wildcard $(ROOTDIR)/cpp/*.cpp) hostsoft 18 | rm -rf build 19 | mkdir -p build 20 | cd $(BUILD_DIR); apio init -b ulx3s-85f -p . 21 | bsc $(BSCFLAGS) $(BSCFLAGS_SYNTH) -remove-dollar -p +:$(BSVPATH_ALL) -verilog -u -g mkTop $(BSVPATH)/Top.bsv 22 | cp $(BSVPATH)/*.lpf build 23 | cp $(BSVPATH)/bsv_verilog/*.v build 24 | cp $(BSVPATH)/verilog/*.v build 25 | bash replace_inout_token.sh 26 | #cd build; apio verify 27 | cd build; apio build -v 28 | 29 | hostsoft: $(wildcard ./cpp/*.cpp) $(wildcard ./cpp/*.h) $(wildcard $(BSVPATH)/cpp/*.cpp) $(wildcard $(BSVPATH)/cpp/*.h) 30 | make -C ./cpp 31 | 32 | bsim: $(wildcard ./*bsv) $(wildcard $(BSVPATH)/*.bsv) $(wildcard ./cpp/*.cpp) $(wildcard $(BSVPATH)/cpp/*.cpp) $(wildcard $(BSVPATH)/cpp/*.h) 33 | mkdir -p $(BSIM_DIR) 34 | bsc $(BSCFLAGS) $(BSCFLAGS_BSIM) $(DEBUGFLAGS) -p +:$(BSVPATH_ALL) -sim -u -g mkTop_bsim $(BSVPATH)/Top.bsv 35 | bsc $(BSCFLAGS) $(BSCFLAGS_BSIM) $(DEBUGFLAGS) -sim -e mkTop_bsim -o $(BSIM_DIR)/bsim $(BSIM_DIR)/*.ba $(BSIM_CPPFILES) 36 | 37 | runsim: bsim 38 | ./bsim/bsim 2> output.log | tee system.log 39 | python3 output2png.py 40 | 41 | program: 42 | cd build; apio upload 43 | 44 | clean: 45 | rm -rf build 46 | rm -rf bsim 47 | 48 | -------------------------------------------------------------------------------- /projects/test/cpp/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "../../../src/cpp/ttyifc.h" 10 | 11 | 12 | void* swmain(void* param) { 13 | 14 | uart_send(0xff); 15 | uart_send(0xbe); 16 | uart_send(0xef); 17 | uart_send(0); 18 | uart_send(0); 19 | uart_send(0); 20 | 21 | uart_send(0xff); 22 | uart_send(0xde); 23 | uart_send(0xad); 24 | uart_send(0); 25 | uart_send(0); 26 | uart_send(1); 27 | 28 | 29 | 30 | uart_send(0); 31 | uart_send(0xff); 32 | uart_send(0xff); 33 | uart_send(0); 34 | uart_send(0); 35 | uart_send(0); 36 | 37 | uart_send(0); 38 | uart_send(0xff); 39 | uart_send(0xff); 40 | uart_send(0); 41 | uart_send(0); 42 | uart_send(1); 43 | 44 | printf( "Sent!\n" ); 45 | 46 | uint32_t inbuf = 0; 47 | int inbufr = 1; 48 | while(true) { 49 | uint32_t c = uart_recv(); 50 | if ( c > 0xff ) continue; 51 | inbuf = inbuf | (c<<(inbufr*8)); 52 | if ( inbufr > 0 ) inbufr--; 53 | else { 54 | inbufr = 1; 55 | printf( "%x\n", inbuf ); 56 | inbuf = 0; 57 | fflush(stdout); 58 | } 59 | 60 | } 61 | 62 | 63 | 64 | 65 | 66 | exit(0); 67 | 68 | 69 | //float fd[3] = {0.2,4.8726, 1.12}; 70 | float fd[3] = {1,40.161865, 6}; 71 | uint32_t* fdi = (uint32_t*)fd; 72 | 73 | for ( int i = 0; i < 3; i++ ) { 74 | for ( int j = 3; j >= 0; j-- ) { 75 | uint32_t sval = fdi[i]>>(j*8); 76 | uart_send(sval&0xff); 77 | } 78 | } 79 | printf( "Sent all data\n" ); 80 | 81 | uint32_t ir = 0; 82 | int fc = 3; 83 | while(true) { 84 | uint32_t c = uart_recv(); 85 | if ( c > 0xff ) continue; 86 | ir = ir | (c<<(fc*8)); 87 | if ( fc > 0 ) fc--; 88 | else { 89 | fc = 3; 90 | printf( "%f\n", *(float*)&ir ); 91 | } 92 | 93 | } 94 | 95 | } 96 | 97 | int 98 | main() { 99 | int ret = open_tty("/dev/ttyUSB0"); 100 | if ( ret ) return ret; 101 | 102 | swmain(NULL); 103 | } 104 | -------------------------------------------------------------------------------- /projects/matrix4x4/cpp/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "../../../src/cpp/ttyifc.h" 10 | 11 | 12 | void* swmain(void* param) { 13 | float matrix_a[4][4]; 14 | float matrix_b[4][4]; 15 | float matrix_c[4][4]; 16 | float matrix_c_golden[4][4] = {0}; 17 | 18 | srand(time(NULL)); 19 | for ( int i = 0; i < 4; i++ ) { 20 | for ( int j = 0; j < 4; j++ ) { 21 | //matrix_a[i][j] = 2*i+j; 22 | matrix_a[i][j] = ((rand()%128)*1.0-64)/32; 23 | for ( int k = 0; k < 4; k++ ) { 24 | float* v = &(matrix_a[i][j]); 25 | uart_send(((uint8_t*)v)[k]); 26 | } 27 | } 28 | } 29 | for ( int i = 0; i < 4; i++ ) { 30 | for ( int j = 0; j < 4; j++ ) { 31 | //matrix_b[i][j] = 1.0+j; 32 | matrix_b[i][j] = ((rand()%128)*1.0-64)/32; 33 | for ( int k = 0; k < 4; k++ ) { 34 | float* v = &(matrix_b[i][j]); 35 | uart_send(((uint8_t*)v)[k]); 36 | } 37 | } 38 | } 39 | 40 | 41 | for ( int i = 0; i < 4; i++ ) { 42 | for ( int j = 0; j < 4; j++ ) { 43 | for ( int k = 0; k < 4; k++ ) { 44 | uint32_t rcheck = uart_recv(); 45 | while (rcheck > 0xff) rcheck = uart_recv(); 46 | 47 | uint8_t rv = *((uint8_t*)&rcheck); 48 | float* v = &(matrix_c[i][j]); 49 | ((uint8_t*)v)[k] = rv; 50 | 51 | matrix_c_golden[i][j] += matrix_a[i][k]*matrix_b[j][k]; 52 | } 53 | } 54 | } 55 | 56 | printf( "Results from accelerator:\n" ); 57 | for ( int i = 0; i < 4; i++ ) { 58 | for ( int j = 0; j < 4; j++ ) { 59 | printf( "%2.02f ", matrix_c[i][j] ); 60 | } 61 | printf("\n"); 62 | } 63 | printf("\n"); 64 | 65 | printf( "Results golden:\n" ); 66 | for ( int i = 0; i < 4; i++ ) { 67 | for ( int j = 0; j < 4; j++ ) { 68 | printf( "%2.02f ", matrix_c_golden[i][j] ); 69 | } 70 | printf("\n"); 71 | } 72 | printf("\n"); 73 | 74 | printf( "Finished execution!\n" ); 75 | fflush(stdout); 76 | exit(0); 77 | } 78 | 79 | int 80 | main() { 81 | int ret = open_tty("/dev/ttyUSB0"); 82 | if ( ret ) return ret; 83 | 84 | swmain(NULL); 85 | } 86 | -------------------------------------------------------------------------------- /projects/rv32i/processor/Execute.bsv: -------------------------------------------------------------------------------- 1 | // import Common::*; 2 | import Defines::*; 3 | import Decode::*; 4 | //import ALU::*; 5 | 6 | typedef struct { 7 | IType iType; 8 | RIndx dst; 9 | Bool writeDst; 10 | Word data; 11 | Word addr; 12 | Word nextPC; 13 | } ExecInst deriving (Bits, Eq, FShow); 14 | 15 | 16 | // ALU 17 | /////////////////////////////////////////////////////////////////////////// 18 | 19 | 20 | function Word alu(Word a, Word b, AluFunc func); 21 | 22 | Word res = case (func) 23 | Add: (a + b); 24 | Sub: (a - b); 25 | And: (a & b); 26 | Or: (a | b); 27 | Xor: (a ^ b); 28 | Slt: (signedLT(a, b) ? 1 : 0); 29 | Sltu: ((a < b) ? 1 : 0); 30 | Sll: (a << b[4:0]); 31 | Srl: (a >> b[4:0]); 32 | Sra: signedShiftRight(a, b[4:0]); 33 | endcase; 34 | return res; 35 | endfunction 36 | 37 | 38 | function Bool aluBr(Word a, Word b, BrFunc brFunc); 39 | Bool res = case (brFunc) 40 | Eq: (a == b); 41 | Neq: (a != b); 42 | Lt: signedLT(a, b); 43 | Ltu: (a < b); 44 | Ge: signedGE(a, b); 45 | Geu: (a >= b); 46 | AT: True; 47 | NT: False; 48 | endcase; 49 | return res; 50 | endfunction 51 | 52 | 53 | function ExecInst exec( DecodedInst dInst, Word rVal1, Word rVal2, Word pc ); 54 | let imm = dInst.imm; 55 | let brFunc = dInst.brFunc; 56 | let aluFunc = dInst.aluFunc; 57 | Word data = ?; 58 | Word nextPc = pc+4; 59 | Word addr = 0; 60 | case (dInst.iType) matches 61 | OP: begin data = alu(rVal1, rVal2, aluFunc); nextPc = pc+4; end 62 | OPIMM: begin data = alu(rVal1, imm, aluFunc); nextPc = pc+4; end 63 | BRANCH: begin nextPc = aluBr(rVal1, rVal2, brFunc) ? pc+imm : pc+4; end 64 | LUI: begin data = imm; end 65 | JAL: begin data = pc+4; nextPc = pc+imm; end 66 | JALR: begin data = pc+4; nextPc = (rVal1+imm) & ~1; end 67 | LOAD: begin addr = rVal1+imm; nextPc = pc+4; end 68 | STORE: begin data = rVal2; addr = rVal1+imm; nextPc = pc+4; end 69 | AUIPC: begin data = pc+imm; nextPc = pc+4; end 70 | endcase 71 | ExecInst eInst = ?; 72 | eInst.iType = dInst.iType; 73 | eInst.dst = dInst.dst; 74 | eInst.writeDst = dInst.writeDst; 75 | eInst.data = data; 76 | eInst.addr = addr; 77 | eInst.nextPC = nextPc; 78 | return eInst; 79 | endfunction 80 | -------------------------------------------------------------------------------- /projects/rv32i/processor/Scoreboard.bsv: -------------------------------------------------------------------------------- 1 | import Vector::*; 2 | import FIFOF::*; 3 | 4 | interface ScoreboardIfc#(numeric type cnt); 5 | method Action enq(Bit#(5) data); 6 | method Action deq; 7 | method Bool search1(Bit#(5) data); 8 | method Bool search2(Bit#(5) data); 9 | endinterface 10 | 11 | function Bit#(cntsz) wrapinc(Bit#(cntsz) idx, Integer cnt); 12 | Bit#(TAdd#(1,cntsz)) p1 = zeroExtend(idx) + 1; 13 | if ( p1 >= fromInteger(cnt) ) p1 = 0; 14 | return truncate(p1); 15 | endfunction 16 | 17 | module mkScoreboard(ScoreboardIfc#(cnt)) 18 | provisos(Log#(cnt, cntsz)); 19 | Integer icnt = valueOf(cnt); 20 | 21 | Vector#(cnt, Reg#(Bit#(5))) datav <- replicateM(mkReg(0)); 22 | Reg#(Bit#(cntsz)) enqoff <- mkReg(0); 23 | Reg#(Bit#(cntsz)) deqoff <- mkReg(0); 24 | 25 | Wire#(Bit#(1)) enqreq <- mkDWire(0); 26 | Wire#(Bit#(5)) enqdata <- mkDWire(0); 27 | Wire#(Bit#(1)) deqreq <- mkDWire(0); 28 | 29 | function Bool searchv(Bit#(5) q); 30 | Bool ret = False; 31 | if ( q != 0 ) begin 32 | for (Integer i = 0; i < icnt; i=i+1) begin 33 | if ( datav[i] == q ) begin 34 | if ( enqoff > deqoff && fromInteger(i) < enqoff && fromInteger(i) >= deqoff ) ret = True; 35 | if ( enqoff < deqoff && (fromInteger(i) < enqoff || fromInteger(i) >= deqoff) ) ret = True; 36 | end 37 | end 38 | end 39 | return ret; 40 | endfunction 41 | 42 | rule normalize; 43 | let doff = deqoff; 44 | let eoff = enqoff; 45 | if ( deqreq != 0 ) begin 46 | doff = wrapinc(deqoff, icnt); 47 | deqoff <= doff; 48 | //$display( "- %x %x -- %x", eoff, doff, datav[deqoff] ); 49 | end 50 | if ( enqreq != 0 ) begin 51 | eoff = wrapinc(enqoff, icnt); 52 | enqoff <= eoff; 53 | datav[enqoff] <= enqdata; 54 | //$display( "+ %x %x -- %x", eoff, doff, enqdata ); 55 | end 56 | //for ( Integer i = 0; i < icnt; i=i+1 ) $write ( "%x ", datav[i] ); 57 | //$display(""); 58 | endrule 59 | 60 | method Action enq(Bit#(5) data) if ( deqoff != wrapinc(enqoff, icnt) ); 61 | enqreq <= 1; 62 | enqdata <= data; 63 | endmethod 64 | method Action deq if ( deqoff != enqoff ); 65 | deqreq <= 1; 66 | endmethod 67 | method Bool search1(Bit#(5) data); 68 | return searchv(data); 69 | endmethod 70 | method Bool search2(Bit#(5) data); 71 | return searchv(data); 72 | endmethod 73 | endmodule 74 | 75 | -------------------------------------------------------------------------------- /projects/rv32i/sw/minisudoku.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | char volatile * const printchar = (char*)0xfff; 5 | char setin[16] = {0,3,0,4,0,0,2,0,4,0,3,0,0,0,0,2}; 6 | 7 | 8 | bool check(char* set, int idx) { 9 | int row = idx/4; 10 | int col = idx%4; 11 | int blk = ((idx/8)*8)+(col/2)*2; 12 | bool unique[12]; 13 | for ( int i = 0; i < 12; i++ ) unique[i] = false; 14 | 15 | bool ret = true; 16 | for ( int i = 0; i < 4; i++ ) { 17 | char r = set[row*4+i]; 18 | char c = set[i*4+col]; 19 | char b = set[blk+(i/2)*4+(i%2)]; 20 | //printf( "row: %d col: %d blk: %d\n", r,c,b ); 21 | 22 | if ( r > 0 ) { 23 | if ( unique[r-1] ) ret = false; 24 | unique[r-1] = true; 25 | } 26 | if ( c > 0 ) { 27 | if ( unique[c+4-1] ) ret = false; 28 | unique[c+4-1] = true; 29 | } 30 | if ( b > 0 ) { 31 | if ( unique[b+8-1] ) ret = false; 32 | unique[b+8-1] = true; 33 | } 34 | } 35 | //printf( "check %s %x\n", ret?"true":"false", mask ); 36 | return ret; 37 | } 38 | bool solve(char* set, int idx) { 39 | if ( idx >= 16 ) return true; 40 | 41 | if ( set[idx] > 0 ) { 42 | return solve(set, idx+1); 43 | } else { 44 | for ( int i = 0; i < 4; i++ ) { 45 | set[idx] = i+1; 46 | bool r = check(set, idx); 47 | 48 | if (r) { 49 | if ( solve(set, idx+1) ) return true; 50 | } 51 | } 52 | set[idx] = 0; 53 | return false; 54 | } 55 | } 56 | 57 | int 58 | main() { 59 | //char set[16] = {0,4,3,0,0,0,4,2,0,2,0,0,3,0,0,0}; 60 | //char set[16] = {0,0,3,0,0,4,0,2,0,0,2,0,0,2,0,3}; 61 | //printf("input:\n"); 62 | /* 63 | for ( int i = 0; i < 4; i++ ) { 64 | for ( int j = 0; j < 4; j++ ) { 65 | printf( "%d ", set[i*4+j] ); 66 | } 67 | printf( "\n" ); 68 | } 69 | 70 | printf( "\n" ); 71 | */ 72 | for ( int i = 0; i < 16; i++ ) { 73 | (*printchar) = (setin[i]+0x30); 74 | if ( i % 4 == 3 ) (*printchar) = 0xa; 75 | } 76 | (*printchar) = 0xa; 77 | 78 | if ( solve(setin, 0) ) { 79 | for ( int i = 0; i < 16; i++ ) { 80 | (*printchar) = (setin[i]+0x30); 81 | if ( i % 4 == 3 ) (*printchar) = 0xa; 82 | } 83 | int mul =1; 84 | for ( int i = 0; i < 16; i++ ) { 85 | int v = setin[i]; 86 | asm ("mul %0, %1, %2" : "=r"(mul) : "r"(mul), "r" (v)); 87 | } 88 | (*printchar) = 0xa; 89 | (*printchar) = (mul&0x7)+0x30; 90 | 91 | } else { 92 | (*printchar) = 0x78; 93 | (*printchar) = 0xa; 94 | } 95 | 96 | } 97 | -------------------------------------------------------------------------------- /src/BRAMSubWord.bsv: -------------------------------------------------------------------------------- 1 | import FIFO::*; 2 | import Vector::*; 3 | import BRAM::*; 4 | 5 | interface BRAMSubWord4Ifc#(numeric type addrsz); 6 | // "bytes": 0 = 1 byte, 3 = 4 bytes 7 | method Action req(Bit#(addrsz) addr, Bit#(32) word, Bit#(2) bytes, Bool write); 8 | method ActionValue#(Bit#(32)) resp; 9 | endinterface 10 | 11 | function Bit#(32) genMask(Bit#(2) off, Bit#(2) bytes); // bytes starts at one, so 0 -> 1 byte 12 | Vector#(4,Bit#(8)) mask; 13 | for ( Integer i = 0; i < 4; i=i+1 ) begin 14 | if ( fromInteger(i) >= off && fromInteger(i) <= off+bytes ) mask[i] = 8'hff; 15 | else mask[i] = 0; 16 | end 17 | return {mask[3],mask[2],mask[1],mask[0]}; 18 | endfunction 19 | 20 | module mkBRAMSubWord (BRAMSubWord4Ifc#(asz)) 21 | provisos( 22 | Add#(sasz,2,asz) 23 | ); 24 | BRAM2Port#(Bit#(sasz),Bit#(32)) mem <- mkBRAM2Server(defaultValue); 25 | FIFO#(Tuple4#(Bit#(asz),Bit#(32),Bit#(2),Bool)) reqQ <- mkFIFO; 26 | FIFO#(Bit#(2)) readOffsetQ <- mkFIFO; 27 | 28 | rule doWrite; 29 | let r = reqQ.first; 30 | reqQ.deq; 31 | let d <- mem.portA.response.get; 32 | let addr = tpl_1(r); 33 | let data = tpl_2(r); 34 | let bytes = tpl_3(r); 35 | let write = tpl_4(r); 36 | if (!write) begin 37 | 38 | mem.portB.request.put(BRAMRequest{write:False,responseOnWrite:False,address:truncate(addr>>2),datain:?}); 39 | //$write( "Reading %x\n", addr>>2 ); 40 | readOffsetQ.enq(truncate(addr)); 41 | end else begin 42 | Bit#(2) woff = truncate(addr); 43 | Bit#(5) woffe = zeroExtend(woff); 44 | Bit#(32) wdat = (data<<(8*woffe)); 45 | Bit#(32) mask = genMask(woff, bytes); 46 | Bit#(32) odat = d & (~mask); 47 | Bit#(32) ndat = wdat | odat; 48 | mem.portB.request.put(BRAMRequest{write:True,responseOnWrite:False,address:truncate(addr>>2),datain:ndat}); 49 | //$write( "Writing %x %x %x %x %x\n", addr>>2, d, data, ndat, mask ); 50 | end 51 | endrule 52 | 53 | 54 | method Action req(Bit#(asz) addr, Bit#(32) word, Bit#(2) bytes, Bool write); 55 | reqQ.enq(tuple4(addr,word,bytes,write)); 56 | mem.portA.request.put(BRAMRequest{write:False,responseOnWrite:False,address:truncate(addr>>2),datain:?}); 57 | endmethod 58 | method ActionValue#(Bit#(32)) resp; 59 | let d <- mem.portB.response.get; 60 | Bit#(5) so = zeroExtend(readOffsetQ.first); 61 | readOffsetQ.deq; 62 | //$write("!~~ %x %x\n", d, so); 63 | return (d>>(so*8)); 64 | endmethod 65 | endmodule 66 | -------------------------------------------------------------------------------- /src/Top.bsv: -------------------------------------------------------------------------------- 1 | import Clocks :: *; 2 | import Vector::*; 3 | import FIFO::*; 4 | import Uart::*; 5 | import BRAMSubWord::*; 6 | import PLL::*; 7 | import Sdram::*; 8 | 9 | import HwMain::*; 10 | 11 | 12 | interface TopIfc; 13 | (* always_ready *) 14 | method Bit#(1) ftdi_rxd; 15 | (* always_enabled, always_ready, prefix = "", result = "serial_txd" *) 16 | method Action ftdi_tx(Bit#(1) ftdi_txd); 17 | (* always_enabled, always_ready, prefix = "", result = "led" *) 18 | method Bit#(8) led; 19 | 20 | `ifndef BSIM 21 | (* always_ready, prefix = "" *) 22 | interface Ulx3sSdramPinsIfc sdram_pins; 23 | `endif 24 | endinterface 25 | 26 | (* no_default_clock, no_default_reset*) 27 | module mkTop#(Clock clk_25mhz)(TopIfc); 28 | PLLIfc pll <- mkPllFast(clk_25mhz); 29 | Reset rst_target = pll.rst_100mhz; 30 | Clock clk_target = pll.clk_100mhz; 31 | 32 | Reset rst_null = noReset(); 33 | UartIfc uart <- mkUart(217, clocked_by clk_25mhz, reset_by rst_null); //115200 baud on 25 mhz 34 | SyncFIFOIfc#(Bit#(8)) serialToMainQ <- mkSyncFIFO(4,clk_25mhz, rst_null, clk_target); 35 | SyncFIFOIfc#(Bit#(8)) mainToSerialQ <- mkSyncFIFO(4,clk_target, rst_target, clk_25mhz); 36 | 37 | Ulx3sSdramIfc mem <- mkUlx3sSdram(pll.clk_100mhz, 100, clocked_by clk_target, reset_by rst_target); 38 | 39 | HwMainIfc main <- mkHwMain(mem.user, clocked_by clk_target, reset_by rst_target); 40 | 41 | Reg#(Bit#(8)) uartInWord <- mkReg(0, clocked_by clk_25mhz, reset_by rst_null); 42 | rule relayUartIn; 43 | Bit#(8) d <- uart.user.get; 44 | serialToMainQ.enq(d); 45 | uartInWord <= d; 46 | endrule 47 | rule relayUartIn2; 48 | serialToMainQ.deq; 49 | main.serial_rx(serialToMainQ.first); 50 | endrule 51 | 52 | rule relayUartOut; 53 | let d <- main.serial_tx; 54 | mainToSerialQ.enq(d); 55 | endrule 56 | rule relayUartOut2; 57 | mainToSerialQ.deq; 58 | uart.user.send(mainToSerialQ.first); 59 | endrule 60 | 61 | 62 | 63 | method Bit#(1) ftdi_rxd; 64 | return uart.serial_txd; 65 | endmethod 66 | method Action ftdi_tx(Bit#(1) ftdi_txd); 67 | uart.serial_rx(ftdi_txd); 68 | endmethod 69 | `ifndef BSIM 70 | interface sdram_pins = mem.pins; 71 | `endif 72 | method Bit#(8) led; 73 | return uartInWord; 74 | endmethod 75 | endmodule 76 | 77 | module mkTop_bsim(Empty); 78 | Clock curclk <- exposeCurrentClock; 79 | Ulx3sSdramIfc mem <- mkUlx3sSdram(curclk, 100); 80 | HwMainIfc main <- mkHwMain(mem.user); 81 | UartUserIfc uart <- mkUart_bsim; 82 | rule relayUartIn; 83 | let d <- uart.get; 84 | main.serial_rx(d); 85 | endrule 86 | rule relayUartOut; 87 | let d <- main.serial_tx; 88 | //$write( "uart sending serial\n" ); 89 | uart.send(d); 90 | endrule 91 | endmodule 92 | -------------------------------------------------------------------------------- /projects/rv32i/cpp/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | pthread_mutex_t g_mutex; 10 | pthread_t g_thread; 11 | std::queue sw2hwq; 12 | std::queue hw2swq; 13 | int swmain(); 14 | 15 | bool g_init_done = false; 16 | void init() { 17 | if ( g_init_done ) return; 18 | pthread_mutex_init(&g_mutex, NULL); 19 | pthread_create(&g_thread, NULL, swmain, NULL); 20 | 21 | g_init_done = true; 22 | } 23 | 24 | uint8_t g_outidx = 0xff; 25 | extern "C" uint32_t bdpiUartGet(uint8_t idx) { 26 | init(); 27 | 28 | uint32_t data = 0xffffffff; 29 | pthread_mutex_lock(&g_mutex); 30 | if ( idx != g_outidx && !sw2hwq.empty() ) { 31 | // get new data 32 | data = sw2hwq.front(); 33 | sw2hwq.pop(); 34 | //printf( "uart get %d %d %d -> %x\n", idx, g_outidx, sw2hwq.size(), data&0xff ); 35 | g_outidx = ((int)g_outidx+1)&0xff; 36 | } 37 | pthread_mutex_unlock(&g_mutex); 38 | return data; 39 | } 40 | 41 | uint8_t g_inidx = 0xff; 42 | extern "C" void bdpiUartPut(uint32_t d) { 43 | init(); 44 | 45 | uint8_t idx = 0xff&(d>>8); 46 | uint8_t data = 0xff&d; 47 | if ( idx != g_inidx ) { 48 | g_inidx = idx; 49 | //printf( "--%d, %d\n",idx, data ); 50 | pthread_mutex_lock(&g_mutex); 51 | hw2swq.push(data); 52 | pthread_mutex_unlock(&g_mutex); 53 | } 54 | } 55 | 56 | uint32_t uart_recv() { 57 | init(); 58 | uint32_t r = 0xffffffff; 59 | 60 | pthread_mutex_lock(&g_mutex); 61 | if ( !hw2swq.empty() ) { 62 | r = hw2swq.front(); 63 | hw2swq.pop(); 64 | } 65 | pthread_mutex_unlock(&g_mutex); 66 | return r; 67 | } 68 | void uart_send(uint8_t data) { 69 | init(); 70 | 71 | pthread_mutex_lock(&g_mutex); 72 | sw2hwq.push(data); 73 | pthread_mutex_unlock(&g_mutex); 74 | } 75 | 76 | 77 | int swmain() { 78 | FILE* bin = fopen("sw/minisudoku.bin", "rb"); 79 | int byteoff = 0; 80 | while(!feof(bin)) { 81 | uint8_t din; 82 | if ( !fread(&din, 1, 1, bin) ) continue; 83 | if ( byteoff < 4096 ) { 84 | uart_send(0); //imem write 85 | } else { 86 | uart_send(2); //dmem write 'b010 87 | //printf( "Loading %x to %d\n", din, byteoff ); 88 | } 89 | uart_send(din); 90 | byteoff ++; 91 | } 92 | printf( "sent all data %d\n", byteoff ); fflush(stdout); 93 | uart_send(1); // start processor 94 | uart_send(1); // start processor 95 | 96 | while(true) { 97 | uint32_t c = uart_recv(); 98 | if ( c > 0xff ) continue; 99 | uint8_t cr = c; 100 | 101 | //fprintf(stderr, "<%c:0x%x>", cr, cr ); 102 | fprintf(stderr, "%c", cr ); 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /projects/rv32i/sw/microbench.dump: -------------------------------------------------------------------------------- 1 | 2 | ././obj//binary.elf: file format elf32-littleriscv 3 | 4 | 5 | Disassembly of section .text: 6 | 7 | 00000000 : 8 | 0: 00200293 addi x5,x0,2 9 | 4: 00300313 addi x6,x0,3 10 | 8: 00700393 addi x7,x0,7 11 | c: 00000013 addi x0,x0,0 12 | 10: 00000013 addi x0,x0,0 13 | 14: 00000013 addi x0,x0,0 14 | 18: 00000013 addi x0,x0,0 15 | 1c: 00000013 addi x0,x0,0 16 | 20: 026282b3 mul x5,x5,x6 17 | 24: 00000013 addi x0,x0,0 18 | 28: 00000013 addi x0,x0,0 19 | 2c: 00000013 addi x0,x0,0 20 | 30: 00000013 addi x0,x0,0 21 | 34: 00000013 addi x0,x0,0 22 | 38: 00638333 add x6,x7,x6 23 | 3c: 00000013 addi x0,x0,0 24 | 40: 00000013 addi x0,x0,0 25 | 44: 00000013 addi x0,x0,0 26 | 48: 00000013 addi x0,x0,0 27 | 4c: 00000013 addi x0,x0,0 28 | 50: 026282b3 mul x5,x5,x6 29 | 54: 00000013 addi x0,x0,0 30 | 58: 00000013 addi x0,x0,0 31 | 5c: 00000013 addi x0,x0,0 32 | 60: 00000013 addi x0,x0,0 33 | 64: 00000013 addi x0,x0,0 34 | 68: 40000e13 addi x28,x0,1024 35 | 6c: 00000013 addi x0,x0,0 36 | 70: 00000013 addi x0,x0,0 37 | 74: 00000013 addi x0,x0,0 38 | 78: 00000013 addi x0,x0,0 39 | 7c: 00000013 addi x0,x0,0 40 | 80: 005e2023 sw x5,0(x28) 41 | 42 | 00000084 : 43 | 84: 40100293 addi x5,x0,1025 44 | 88: 00300313 addi x6,x0,3 45 | 8c: 00300393 addi x7,x0,3 46 | 90: 00000e13 addi x28,x0,0 47 | 94: 00000013 addi x0,x0,0 48 | 98: 00000013 addi x0,x0,0 49 | 9c: 00000013 addi x0,x0,0 50 | a0: 00000013 addi x0,x0,0 51 | a4: 00000013 addi x0,x0,0 52 | a8: 00000013 addi x0,x0,0 53 | ac: 00730663 beq x6,x7,b8 54 | b0: 01c2a023 sw x28,0(x5) 55 | b4: 0080006f jal x0,bc 56 | 57 | 000000b8 : 58 | b8: 0072a023 sw x7,0(x5) 59 | 60 | 000000bc : 61 | bc: 00200293 addi x5,x0,2 62 | c0: 00700313 addi x6,x0,7 63 | c4: 01100393 addi x7,x0,17 64 | c8: 00500e13 addi x28,x0,5 65 | cc: ffc38393 addi x7,x7,-4 66 | d0: 027303b3 mul x7,x6,x7 67 | d4: 005e02b3 add x5,x28,x5 68 | d8: 405382b3 sub x5,x7,x5 69 | dc: 40200e13 addi x28,x0,1026 70 | e0: 005e2023 sw x5,0(x28) 71 | e4: 40300313 addi x6,x0,1027 72 | e8: 00001297 auipc x5,0x1 73 | ec: f1828293 addi x5,x5,-232 # 1000 74 | f0: 0002a483 lw x9,0(x5) 75 | f4: 0042a903 lw x18,4(x5) 76 | f8: 012489b3 add x19,x9,x18 77 | fc: 01332023 sw x19,0(x6) 78 | 100: c0001073 unimp 79 | -------------------------------------------------------------------------------- /src/Mult18x18D.bsv: -------------------------------------------------------------------------------- 1 | package Mult18x18D; 2 | 3 | import FIFO::*; 4 | import FIFOF::*; 5 | 6 | interface Mult18x18DImportIfc; 7 | method Action puta(Bit#(18) puta); 8 | method Action putb(Bit#(18) putb); 9 | method Bit#(36) dataout; 10 | endinterface 11 | 12 | import "BVI" mult18x18d = 13 | module mkMult18x18DImport#(Clock clk, Reset rstn) (Mult18x18DImportIfc); 14 | default_clock no_clock; 15 | default_reset no_reset; 16 | 17 | input_clock (clk) = clk; 18 | input_reset (rstn) = rstn; 19 | 20 | method dataout dataout; 21 | method puta(dataax) enable((*inhigh*) dataax_EN) reset_by(no_reset) clocked_by(clk); 22 | method putb(dataay) enable((*inhigh*) dataay_EN) reset_by(no_reset) clocked_by(clk); 23 | schedule ( 24 | dataout, puta, putb 25 | ) CF ( 26 | dataout, puta, putb 27 | ); 28 | endmodule 29 | 30 | interface Mult18x18DBSIMIfc; 31 | method Action puta(Bit#(18) puta); 32 | method Action putb(Bit#(18) putb); 33 | method ActionValue#(Bit#(36)) dataout; 34 | endinterface 35 | module mkMult18x18DBSIM(Mult18x18DBSIMIfc); 36 | FIFO#(Bit#(18)) aQ <- mkSizedFIFO(4); 37 | FIFO#(Bit#(18)) bQ <- mkSizedFIFO(4); 38 | method Action puta(Bit#(18) a); 39 | aQ.enq(a); 40 | endmethod 41 | method Action putb(Bit#(18) b); 42 | bQ.enq(b); 43 | endmethod 44 | method ActionValue#(Bit#(36)) dataout; 45 | aQ.deq; 46 | bQ.deq; 47 | return zeroExtend(aQ.first)*zeroExtend(bQ.first); 48 | endmethod 49 | endmodule 50 | 51 | interface Mult18x18DIfc; 52 | method Action put(Bit#(18) a, Bit#(18) b); 53 | method ActionValue#(Bit#(36)) get; 54 | endinterface 55 | 56 | 57 | module mkMult18x18D(Mult18x18DIfc); 58 | Clock curclk <- exposeCurrentClock; 59 | Reset currst <- exposeCurrentReset; 60 | `ifdef BSIM 61 | Mult18x18DBSIMIfc multin <- mkMult18x18DBSIM(); 62 | `else 63 | Mult18x18DImportIfc multin <- mkMult18x18DImport(curclk, currst); 64 | `endif 65 | FIFOF#(Bit#(36)) outQ <- mkSizedFIFOF(4); 66 | Wire#(Bit#(1)) validWire <- mkDWire(0); 67 | Reg#(Bit#(4)) validMap <- mkReg(0); 68 | Wire#(Bit#(18)) wireA <- mkDWire(0); 69 | Wire#(Bit#(18)) wireB <- mkDWire(0); 70 | rule enqValid; 71 | 72 | `ifdef BSIM 73 | if ( validWire != 0 ) begin 74 | multin.puta(wireA); 75 | multin.putb(wireB); 76 | end 77 | `else 78 | multin.puta(wireA); 79 | multin.putb(wireB); 80 | `endif 81 | 82 | validMap <= (validMap<<1)|zeroExtend(validWire); 83 | if ( validMap[2] == 1 ) begin 84 | `ifdef BSIM 85 | let outd <- multin.dataout; 86 | `else 87 | let outd = multin.dataout; 88 | `endif 89 | if( outQ.notFull ) outQ.enq(outd); 90 | end 91 | endrule 92 | 93 | 94 | Reg#(Bit#(3)) dataInFlightUp <- mkReg(0); 95 | Reg#(Bit#(3)) dataInFlightDn <- mkReg(0); 96 | method Action put(Bit#(18) a, Bit#(18) b) if ( dataInFlightUp-dataInFlightDn < 5 ); 97 | wireA <= a; 98 | wireB <= b; 99 | validWire <= 1; 100 | dataInFlightUp <= dataInFlightUp + 1; 101 | endmethod 102 | method ActionValue#(Bit#(36)) get; 103 | outQ.deq; 104 | dataInFlightDn <= dataInFlightDn + 1; 105 | return outQ.first; 106 | endmethod 107 | endmodule 108 | 109 | endpackage: Mult18x18D 110 | -------------------------------------------------------------------------------- /src/bsv_verilog/SyncResetA.v: -------------------------------------------------------------------------------- 1 | 2 | // Copyright (c) 2000-2012 Bluespec, Inc. 3 | 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files (the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions: 10 | 11 | // The above copyright notice and this permission notice shall be included in 12 | // all copies or substantial portions of the Software. 13 | 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | // THE SOFTWARE. 21 | // 22 | // $Revision: 29452 $ 23 | // $Date: 2012-08-27 22:01:48 +0000 (Mon, 27 Aug 2012) $ 24 | 25 | `ifdef BSV_ASSIGNMENT_DELAY 26 | `else 27 | `define BSV_ASSIGNMENT_DELAY 28 | `endif 29 | 30 | `ifdef BSV_POSITIVE_RESET 31 | `define BSV_RESET_VALUE 1'b1 32 | `define BSV_RESET_EDGE posedge 33 | `else 34 | `define BSV_RESET_VALUE 1'b0 35 | `define BSV_RESET_EDGE negedge 36 | `endif 37 | 38 | 39 | 40 | // A synchronization module for resets. Output resets are held for 41 | // RSTDELAY+1 cycles, RSTDELAY >= 0. Reset assertion is asynchronous, 42 | // while deassertion is synchronized to the clock. 43 | module SyncResetA ( 44 | IN_RST, 45 | CLK, 46 | OUT_RST 47 | ); 48 | 49 | parameter RSTDELAY = 1 ; // Width of reset shift reg 50 | 51 | input CLK ; 52 | input IN_RST ; 53 | output OUT_RST ; 54 | 55 | reg [RSTDELAY:0] reset_hold ; 56 | wire [RSTDELAY+1:0] next_reset = {reset_hold, ~ `BSV_RESET_VALUE} ; 57 | 58 | assign OUT_RST = reset_hold[RSTDELAY] ; 59 | 60 | always @( posedge CLK or `BSV_RESET_EDGE IN_RST ) 61 | begin 62 | if (IN_RST == `BSV_RESET_VALUE) 63 | begin 64 | reset_hold <= `BSV_ASSIGNMENT_DELAY {RSTDELAY+1 {`BSV_RESET_VALUE}} ; 65 | end 66 | else 67 | begin 68 | reset_hold <= `BSV_ASSIGNMENT_DELAY next_reset[RSTDELAY:0]; 69 | end 70 | end // always @ ( posedge CLK or `BSV_RESET_EDGE IN_RST ) 71 | 72 | `ifdef BSV_NO_INITIAL_BLOCKS 73 | `else // not BSV_NO_INITIAL_BLOCKS 74 | // synopsys translate_off 75 | initial 76 | begin 77 | #0 ; 78 | // initialize out of reset forcing the designer to do one 79 | reset_hold = {(RSTDELAY + 1) {~ `BSV_RESET_VALUE}} ; 80 | end 81 | // synopsys translate_on 82 | `endif // BSV_NO_INITIAL_BLOCKS 83 | 84 | endmodule // SyncResetA 85 | -------------------------------------------------------------------------------- /src/bsv_verilog/MakeResetA.v: -------------------------------------------------------------------------------- 1 | 2 | // Copyright (c) 2000-2012 Bluespec, Inc. 3 | 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files (the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions: 10 | 11 | // The above copyright notice and this permission notice shall be included in 12 | // all copies or substantial portions of the Software. 13 | 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | // THE SOFTWARE. 21 | // 22 | // $Revision: 29452 $ 23 | // $Date: 2012-08-27 22:01:48 +0000 (Mon, 27 Aug 2012) $ 24 | 25 | `ifdef BSV_ASSIGNMENT_DELAY 26 | `else 27 | `define BSV_ASSIGNMENT_DELAY 28 | `endif 29 | 30 | `ifdef BSV_POSITIVE_RESET 31 | `define BSV_RESET_VALUE 1'b1 32 | `define BSV_RESET_EDGE posedge 33 | `else 34 | `define BSV_RESET_VALUE 1'b0 35 | `define BSV_RESET_EDGE negedge 36 | `endif 37 | 38 | 39 | 40 | module MakeResetA ( 41 | CLK, 42 | RST, 43 | ASSERT_IN, 44 | ASSERT_OUT, 45 | 46 | DST_CLK, 47 | OUT_RST 48 | ); 49 | 50 | parameter RSTDELAY = 2 ; // Width of reset shift reg 51 | parameter init = 1 ; 52 | 53 | input CLK ; 54 | input RST ; 55 | input ASSERT_IN ; 56 | output ASSERT_OUT ; 57 | 58 | input DST_CLK ; 59 | output OUT_RST ; 60 | 61 | reg rst ; 62 | wire OUT_RST ; 63 | 64 | assign ASSERT_OUT = rst == `BSV_RESET_VALUE ; 65 | 66 | SyncResetA #(RSTDELAY) rstSync (.CLK(DST_CLK), 67 | .IN_RST(rst), 68 | .OUT_RST(OUT_RST)); 69 | 70 | always@(posedge CLK or `BSV_RESET_EDGE RST) begin 71 | if (RST == `BSV_RESET_VALUE) 72 | rst <= `BSV_ASSIGNMENT_DELAY init ? ~ `BSV_RESET_VALUE : `BSV_RESET_VALUE ; 73 | else 74 | begin 75 | if (ASSERT_IN) 76 | rst <= `BSV_ASSIGNMENT_DELAY `BSV_RESET_VALUE; 77 | else // if (rst == 1'b0) 78 | rst <= `BSV_ASSIGNMENT_DELAY ~ `BSV_RESET_VALUE; 79 | end // else: !if(RST == `BSV_RESET_VALUE) 80 | end // always@ (posedge CLK or `BSV_RESET_EDGE RST) 81 | 82 | `ifdef BSV_NO_INITIAL_BLOCKS 83 | `else // not BSV_NO_INITIAL_BLOCKS 84 | // synopsys translate_off 85 | initial begin 86 | #0 ; 87 | rst = ~ `BSV_RESET_VALUE ; 88 | end 89 | // synopsys translate_on 90 | `endif // BSV_NO_INITIAL_BLOCKS 91 | 92 | endmodule // MakeResetA 93 | -------------------------------------------------------------------------------- /projects/nn_fc/cpp/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "../../../src/cpp/ttyifc.h" 10 | #include "nn_fc.h" 11 | 12 | typedef union { 13 | float f; 14 | uint8_t c[4]; 15 | } FloatBit8; 16 | 17 | void send_weight(float data) { 18 | uart_send(0xff); 19 | 20 | FloatBit8 b; 21 | b.f = data; 22 | uart_send(b.c[0]); 23 | uart_send(b.c[1]); 24 | uart_send(b.c[2]); 25 | uart_send(b.c[3]); 26 | } 27 | 28 | void send_input(float value, int input_idx) { 29 | uart_send(input_idx&0xff); 30 | 31 | FloatBit8 b; 32 | b.f = value; 33 | uart_send(b.c[0]); 34 | uart_send(b.c[1]); 35 | uart_send(b.c[2]); 36 | uart_send(b.c[3]); 37 | } 38 | 39 | FC_Result recv_result() { 40 | FC_Result r; 41 | r.value = 0; 42 | r.input_idx = 0; 43 | r.output_idx = 0; 44 | r.valid = false; 45 | uint32_t res = uart_recv(); 46 | if ( res > 0xff ) return r; 47 | r.input_idx = res; 48 | r.valid = true; 49 | 50 | 51 | res = uart_recv(); 52 | while ( res > 0xff ) res = uart_recv(); 53 | r.output_idx = res; 54 | 55 | 56 | FloatBit8 b; 57 | for ( int i = 0; i < 4; i++ ) { 58 | uint32_t res = uart_recv(); 59 | while ( res > 0xff ) res = uart_recv(); 60 | b.c[i] = res; 61 | } 62 | r.value = b.f; 63 | 64 | 65 | 66 | return r; 67 | } 68 | 69 | 70 | void* swmain(void* param) { 71 | srand(time(NULL)); 72 | int input_cnt = 64; 73 | int output_dim = 64; 74 | int input_dim = 1024; 75 | float* weights = (float*)malloc(sizeof(float)*input_dim*output_dim); 76 | for ( int i = 0; i < input_dim*output_dim; i++ ) { 77 | weights[i] = 0; 78 | if ( rand()%4 == 0 ) { 79 | weights[i] = ((float)(rand()%10000))/1000; 80 | } 81 | } 82 | float* inputs = (float*)malloc(sizeof(float)*input_dim*input_cnt); 83 | for ( int i = 0; i < input_dim*input_cnt; i++ ) { 84 | inputs[i] = 0; 85 | if ( rand()%4 == 0 ) { 86 | inputs[i] = ((float)(rand()%10000))/1000; 87 | } 88 | } 89 | float* answer = (float*)malloc(sizeof(float)*output_dim*input_cnt); 90 | float* answergolden = (float*)malloc(sizeof(float)*output_dim*input_cnt); 91 | for ( int i = 0; i < input_cnt; i++ ) { 92 | for ( int j = 0; j < output_dim; j++ ) { 93 | answergolden[i*output_dim+j] = 0; 94 | for ( int k = 0; k < input_dim; k++ ) { 95 | answergolden[i*output_dim+j] += weights[j*input_dim+k]*inputs[i*input_dim+k]; 96 | } 97 | } 98 | } 99 | nn_fc(weights, inputs, input_cnt, input_dim, output_dim, answer); 100 | printf( "Compute done!" ); 101 | fflush(stdout); 102 | 103 | printf( "Comparing results...\n" ); 104 | float diffsum = 0; 105 | for ( int i = 0; i < input_cnt*output_dim; i++ ) { 106 | float diff = answer[i] - answergolden[i]; 107 | if ( diff < 0 ) diff = -diff; 108 | if ( diff > 1 ) { 109 | printf( "Error larger than 1 at %d! %f (%f vs %f)\n", i, diff, answer[i], answergolden[i] ); 110 | } 111 | diffsum += diff; 112 | } 113 | printf( "Compare done! Average diff %f\n", diffsum/(input_cnt*output_dim) ); 114 | fflush(stdout); 115 | 116 | 117 | exit(0); 118 | return NULL; 119 | } 120 | 121 | int 122 | main() { 123 | int ret = open_tty("/dev/ttyUSB0"); 124 | if ( ret ) return ret; 125 | 126 | swmain(NULL); 127 | } 128 | -------------------------------------------------------------------------------- /projects/test/HwMain.bsv: -------------------------------------------------------------------------------- 1 | import Clocks :: *; 2 | import Vector::*; 3 | import FIFO::*; 4 | import Uart::*; 5 | import BRAMSubWord::*; 6 | import PLL::*; 7 | import Sdram::*; 8 | 9 | import Mult18x18D::*; 10 | import SimpleFloat::*; 11 | 12 | interface HwMainIfc; 13 | method ActionValue#(Bit#(8)) serial_tx; 14 | method Action serial_rx(Bit#(8) rx); 15 | 16 | endinterface 17 | 18 | module mkHwMain#(Ulx3sSdramUserIfc mem) (HwMainIfc); 19 | Clock curclk <- exposeCurrentClock; 20 | Reset currst <- exposeCurrentReset; 21 | 22 | FIFO#(Bit#(8)) serialrxQ <- mkFIFO; 23 | FIFO#(Bit#(8)) serialtxQ <- mkFIFO; 24 | 25 | 26 | Reg#(Bit#(64)) sdramCmd <- mkReg(0); 27 | Reg#(Bit#(8)) sdramCmdCnt <- mkReg(0); 28 | rule procUartIn; 29 | Bit#(8) charin = serialrxQ.first(); 30 | serialrxQ.deq; 31 | 32 | let nd = (sdramCmd<<8)|zeroExtend(charin); 33 | if ( sdramCmdCnt + 1 >= 6 ) begin 34 | sdramCmdCnt <= 0; 35 | sdramCmd <= 0; 36 | Bit#(24) addr = truncate(nd); 37 | Bit#(16) data = truncate(nd>>24); 38 | Bool isWrite = (nd[40] == 1); 39 | mem.req(addr,data,isWrite); 40 | //$write("Req addr %x data %x write %s\n", addr, data, isWrite?"yes":"no"); 41 | end else begin 42 | sdramCmd <= nd; 43 | sdramCmdCnt <= sdramCmdCnt + 1; 44 | end 45 | endrule 46 | Reg#(Bit#(8)) sdramReadOutBuf <- mkReg(0); 47 | Reg#(Bool) sdramReadOutBuffered <- mkReg(False); 48 | rule relaySdramread; 49 | if ( sdramReadOutBuffered ) begin 50 | serialtxQ.enq(sdramReadOutBuf); 51 | sdramReadOutBuffered <= False; 52 | end else begin 53 | let d <- mem.readResp; 54 | serialtxQ.enq(truncate(d>>8)); 55 | sdramReadOutBuf <= truncate(d); 56 | sdramReadOutBuffered <= True; 57 | end 58 | endrule 59 | 60 | 61 | 62 | /* 63 | Reg#(Bit#(96)) floatInBuffer <- mkReg(0); 64 | Reg#(Bit#(4)) floatInCnt <- mkReg(0); 65 | FIFO#(Bit#(32)) addQ <- mkFIFO; 66 | FloatTwoOp fmult <- mkFloatMult; 67 | FloatTwoOp fadd <- mkFloatAdd; 68 | 69 | rule procUartIn; 70 | Bit#(8) charin = serialrxQ.first(); 71 | serialrxQ.deq; 72 | 73 | let nd = (floatInBuffer<<8)|zeroExtend(charin); 74 | if ( floatInCnt == 11 ) begin 75 | fmult.put(unpack(truncate(nd)), unpack(truncate(nd>>32))); 76 | addQ.enq(truncate(nd>>64)); 77 | floatInCnt <= 0; 78 | floatInBuffer <= 0; 79 | end else begin 80 | floatInCnt <= floatInCnt + 1; 81 | floatInBuffer <= nd; 82 | end 83 | endrule 84 | rule procAdd; 85 | let nd_ <- fmult.get; 86 | addQ.deq; 87 | fadd.put(nd_, unpack(addQ.first)); 88 | endrule 89 | 90 | Reg#(Bit#(32)) floatOut <- mkReg(0); 91 | Reg#(Bit#(2)) floatOutCnt <- mkReg(0); 92 | rule outputCnt; 93 | if ( floatOutCnt == 0 ) begin 94 | let nd_ <- fadd.get; 95 | //let nd_ <- fmult.get; 96 | Bit#(32) nd = pack(nd_); 97 | floatOut <= {nd[23:0],0}; 98 | serialtxQ.enq(nd[31:24]); 99 | floatOutCnt <= 3; 100 | end else begin 101 | floatOutCnt <= floatOutCnt - 1; 102 | Bit#(32) nd = floatOut; 103 | floatOut <= {nd[23:0],0}; 104 | serialtxQ.enq(nd[31:24]); 105 | end 106 | endrule 107 | */ 108 | 109 | method ActionValue#(Bit#(8)) serial_tx; 110 | serialtxQ.deq; 111 | return serialtxQ.first(); 112 | endmethod 113 | method Action serial_rx(Bit#(8) d); 114 | serialrxQ.enq(d); 115 | endmethod 116 | endmodule 117 | -------------------------------------------------------------------------------- /src/cpp/ttyifc.cpp: -------------------------------------------------------------------------------- 1 | #include "ttyifc.h" 2 | 3 | pthread_mutex_t g_mutex; 4 | pthread_t g_thread; 5 | std::queue sw2hwq; 6 | std::queue hw2swq; 7 | void* swmain(void* param); 8 | 9 | int tty_fd; 10 | 11 | bool g_init_done = false; 12 | void init() { 13 | if ( g_init_done ) return; 14 | pthread_mutex_init(&g_mutex, NULL); 15 | pthread_create(&g_thread, NULL, swmain, NULL); 16 | 17 | g_init_done = true; 18 | } 19 | 20 | uint8_t g_outidx = 0xff; 21 | //extern "C" 22 | uint32_t bdpiUartGet(uint8_t idx) { 23 | init(); 24 | 25 | uint32_t data = 0xffffffff; 26 | pthread_mutex_lock(&g_mutex); 27 | if ( idx != g_outidx && !sw2hwq.empty() ) { 28 | // get new data 29 | data = sw2hwq.front(); 30 | sw2hwq.pop(); 31 | //printf( "uart get %d %d %d -> %x\n", idx, g_outidx, sw2hwq.size(), data&0xff ); 32 | g_outidx = ((int)g_outidx+1)&0xff; 33 | } 34 | pthread_mutex_unlock(&g_mutex); 35 | return data; 36 | } 37 | 38 | uint8_t g_inidx = 0xff; 39 | //extern "C" 40 | void bdpiUartPut(uint32_t d) { 41 | init(); 42 | 43 | uint8_t idx = 0xff&(d>>8); 44 | uint8_t data = 0xff&d; 45 | if ( idx != g_inidx ) { 46 | g_inidx = idx; 47 | //printf( "--%d, %d\n",idx, data ); 48 | pthread_mutex_lock(&g_mutex); 49 | hw2swq.push(data); 50 | pthread_mutex_unlock(&g_mutex); 51 | } 52 | } 53 | 54 | uint32_t uart_recv() { 55 | uint32_t r = 0xffffffff; 56 | #ifdef SYNTH 57 | uint32_t din = 0; 58 | int rdlen = read(tty_fd, &din, 1); 59 | if ( rdlen > 0 ) r = din; 60 | if ( rdlen > 1 ) printf( "received too many bytes from uart! %d\n", rdlen ); 61 | #else 62 | init(); 63 | 64 | pthread_mutex_lock(&g_mutex); 65 | if ( !hw2swq.empty() ) { 66 | r = hw2swq.front(); 67 | hw2swq.pop(); 68 | } 69 | pthread_mutex_unlock(&g_mutex); 70 | #endif 71 | return r; 72 | } 73 | void uart_send(uint8_t data) { 74 | #ifdef SYNTH 75 | write(tty_fd, &data, sizeof(data)); 76 | tcdrain(tty_fd); 77 | #else 78 | init(); 79 | 80 | pthread_mutex_lock(&g_mutex); 81 | sw2hwq.push(data); 82 | pthread_mutex_unlock(&g_mutex); 83 | #endif 84 | } 85 | 86 | void set_tty_attributes(int fd, int baud) 87 | { 88 | struct termios tty; 89 | if (tcgetattr(fd, &tty) < 0) { 90 | printf("tcgetattr error: %s\n", strerror(errno)); 91 | return; 92 | } 93 | 94 | cfsetospeed(&tty, (speed_t)baud); 95 | cfsetispeed(&tty, (speed_t)baud); 96 | 97 | tty.c_cflag |= (CLOCAL | CREAD); 98 | tty.c_cflag &= ~CSIZE; 99 | tty.c_cflag |= CS8; 100 | tty.c_cflag &= ~PARENB; // no parity bit 101 | tty.c_cflag &= ~CSTOPB; // 1 stop bit 102 | tty.c_cflag &= ~CRTSCTS; // no flow control 103 | 104 | tty.c_iflag &= ~(IGNBRK | BRKINT | PARMRK | ISTRIP | INLCR | IGNCR | ICRNL | IXON); 105 | tty.c_lflag &= ~(ECHO | ECHONL | ICANON | ISIG | IEXTEN); 106 | 107 | tty.c_oflag &= ~OPOST; 108 | 109 | tty.c_cc[VMIN] = 0; 110 | tty.c_cc[VTIME] = 0; 111 | 112 | if (tcsetattr(fd, TCSANOW, &tty) != 0) { 113 | printf("tcsetattr error: %s\n", strerror(errno)); 114 | } 115 | return; 116 | } 117 | int open_tty(char* path){ 118 | char* ttyname = path; 119 | tty_fd = open(ttyname, O_RDWR | O_NOCTTY | O_SYNC); 120 | if (tty_fd < 0) { 121 | printf("Error opening TTY %s (%s)\n", ttyname, strerror(errno)); 122 | return 1; 123 | } 124 | set_tty_attributes(tty_fd, B115200); 125 | /* // not necessary since VMIN and VTIME set to zero...? 126 | int flags = fcntl(tty_fd, F_GETFL, 0); 127 | fcntl(tty_fd, F_SETFL, flags | O_NONBLOCK); 128 | */ 129 | return 0; 130 | } 131 | -------------------------------------------------------------------------------- /src/Uart.bsv: -------------------------------------------------------------------------------- 1 | import FIFO::*; 2 | 3 | import "BDPI" function ActionValue#(Bit#(32)) bdpiUartGet(Bit#(8) idx); 4 | import "BDPI" function Action bdpiUartPut(Bit#(32) data); 5 | 6 | interface UartUserIfc; 7 | method Action send(Bit#(8) word); 8 | method ActionValue#(Bit#(8)) get; 9 | endinterface 10 | interface UartIfc; 11 | interface UartUserIfc user; 12 | 13 | (* always_ready *) 14 | method Bit#(1) serial_txd; 15 | (* always_enabled, always_ready, prefix = "", result = "serial_rxd" *) 16 | method Action serial_rx(Bit#(1) serial_rxd); 17 | endinterface 18 | 19 | module mkUart_bsim(UartUserIfc); 20 | FIFO#(Bit#(8)) inQ <- mkFIFO; 21 | FIFO#(Bit#(8)) outQ <- mkFIFO; 22 | Reg#(Bit#(8)) outCnt <- mkReg(0); 23 | Reg#(Bit#(8)) inReqId <- mkReg(0); 24 | rule relayOut; 25 | let d <- bdpiUartGet(inReqId); 26 | Bit#(8) data = truncate(d); 27 | Bit#(8) flg = truncate(d>>8); 28 | if ( flg == 0 ) begin 29 | inReqId <= inReqId + 1; 30 | outQ.enq(data); 31 | end 32 | endrule 33 | rule relayIn; 34 | inQ.deq; 35 | let d = inQ.first; 36 | outCnt <= outCnt + 1; 37 | bdpiUartPut({0,outCnt,d}); 38 | endrule 39 | 40 | method Action send(Bit#(8) word); 41 | inQ.enq(word); 42 | endmethod 43 | method ActionValue#(Bit#(8)) get; 44 | outQ.deq; 45 | return outQ.first; 46 | endmethod 47 | endmodule 48 | //Reg#(Bit#(16)) clkdiv <- mkReg(5000); // 48,000,000 / 9600 49 | 50 | 51 | /******* 52 | Parameters: 53 | clkdiv_ = clock hz / baud. e.g. 48MHz -> 48,000,000 / 9600 = 5000 54 | 55 | Terminal: 56 | 8 bits, no parity, 1 stop bit, no flow control 57 | *******/ 58 | 59 | 60 | module mkUart#(Integer clkdiv_) (UartIfc); 61 | 62 | FIFO#(Bit#(8)) outQ <- mkFIFO; 63 | FIFO#(Bit#(8)) inQ <- mkFIFO; 64 | Bit#(16) clkdiv = fromInteger(clkdiv_); 65 | //Reg#(Bit#(16)) clkdiv <- mkReg(5000); // 48,000,000 / 9600 66 | Reg#(Bit#(16)) clkcnt <- mkReg(0); 67 | 68 | Reg#(Bit#(1)) txdr <- mkReg(1); 69 | Reg#(Bit#(11)) curoutd <- mkReg(0); 70 | Reg#(Bit#(5)) curoutoff <- mkReg(0); 71 | rule outcntclk; 72 | if ( clkcnt + 1 >= clkdiv ) begin 73 | clkcnt <= 0; 74 | 75 | if ( curoutoff != 0 ) begin 76 | curoutd <= {1,curoutd[10:1]}; 77 | txdr <= curoutd[0]; 78 | curoutoff <= curoutoff - 1; 79 | end else begin 80 | inQ.deq; 81 | let word = inQ.first; 82 | curoutd <= {2'b11,word,1'b0}; 83 | curoutoff <= 11; 84 | end 85 | end else begin 86 | clkcnt <= clkcnt + 1; 87 | end 88 | endrule 89 | 90 | Wire#(Bit#(1)) inw <- mkDWire(1); 91 | Reg#(Bit#(16)) samplecountdown <- mkReg(0); 92 | Reg#(Bit#(4)) bleft <- mkReg(0); 93 | Reg#(Bit#(8)) outword <- mkReg(0); 94 | rule insample; 95 | if ( bleft == 0 && inw == 0 ) begin 96 | bleft <= (8+1); 97 | samplecountdown <= (clkdiv*6)/4; 98 | end 99 | else if ( bleft != 0 ) begin 100 | if ( samplecountdown != 0 ) begin 101 | samplecountdown <= samplecountdown - 1; 102 | end else begin 103 | samplecountdown <= clkdiv; 104 | outword <= {inw,outword[7:1]}; 105 | bleft <= bleft - 1; 106 | if ( bleft == 1 ) begin 107 | outQ.enq(outword); 108 | end 109 | end 110 | end 111 | endrule 112 | 113 | 114 | 115 | 116 | 117 | Reg#(Bit#(4)) rxin <- mkReg(4'b1111); 118 | interface UartUserIfc user; 119 | method Action send(Bit#(8) word);// if ( curoutoff == 0 ); 120 | inQ.enq(word); 121 | //curoutd <= {2'b11,word,1'b0}; 122 | //curoutoff <= 11; 123 | endmethod 124 | method ActionValue#(Bit#(8)) get; 125 | outQ.deq; 126 | return outQ.first; 127 | endmethod 128 | endinterface 129 | method Bit#(1) serial_txd; 130 | return txdr; 131 | endmethod 132 | method Action serial_rx(Bit#(1) serial_rxd); 133 | rxin <= {serial_rxd,rxin[3:1]}; 134 | inw <= (rxin==0)?0:1; 135 | endmethod 136 | endmodule 137 | -------------------------------------------------------------------------------- /src/bsv_verilog/FIFO1.v: -------------------------------------------------------------------------------- 1 | 2 | `ifdef BSV_ASSIGNMENT_DELAY 3 | `else 4 | `define BSV_ASSIGNMENT_DELAY 5 | `endif 6 | 7 | `ifdef BSV_POSITIVE_RESET 8 | `define BSV_RESET_VALUE 1'b1 9 | `define BSV_RESET_EDGE posedge 10 | `else 11 | `define BSV_RESET_VALUE 1'b0 12 | `define BSV_RESET_EDGE negedge 13 | `endif 14 | 15 | `ifdef BSV_ASYNC_RESET 16 | `define BSV_ARESET_EDGE_META or `BSV_RESET_EDGE RST 17 | `else 18 | `define BSV_ARESET_EDGE_META 19 | `endif 20 | 21 | `ifdef BSV_RESET_FIFO_HEAD 22 | `define BSV_ARESET_EDGE_HEAD `BSV_ARESET_EDGE_META 23 | `else 24 | `define BSV_ARESET_EDGE_HEAD 25 | `endif 26 | 27 | // Depth 1 FIFO 28 | module FIFO1(CLK, 29 | RST, 30 | D_IN, 31 | ENQ, 32 | FULL_N, 33 | D_OUT, 34 | DEQ, 35 | EMPTY_N, 36 | CLR 37 | ); 38 | 39 | parameter width = 1; 40 | parameter guarded = 1'b1; 41 | input CLK; 42 | input RST; 43 | input [width - 1 : 0] D_IN; 44 | input ENQ; 45 | input DEQ; 46 | input CLR ; 47 | 48 | output FULL_N; 49 | output [width - 1 : 0] D_OUT; 50 | output EMPTY_N; 51 | 52 | reg [width - 1 : 0] D_OUT; 53 | reg empty_reg ; 54 | 55 | 56 | assign EMPTY_N = empty_reg ; 57 | 58 | 59 | `ifdef BSV_NO_INITIAL_BLOCKS 60 | `else // not BSV_NO_INITIAL_BLOCKS 61 | // synopsys translate_off 62 | initial 63 | begin 64 | D_OUT = {((width + 1)/2) {2'b10}} ; 65 | empty_reg = 1'b0 ; 66 | end // initial begin 67 | // synopsys translate_on 68 | `endif // BSV_NO_INITIAL_BLOCKS 69 | 70 | 71 | assign FULL_N = !empty_reg; 72 | 73 | always@(posedge CLK `BSV_ARESET_EDGE_META) 74 | begin 75 | if (RST == `BSV_RESET_VALUE) 76 | begin 77 | empty_reg <= `BSV_ASSIGNMENT_DELAY 1'b0; 78 | end // if (RST == `BSV_RESET_VALUE) 79 | else 80 | begin 81 | if (CLR) 82 | begin 83 | empty_reg <= `BSV_ASSIGNMENT_DELAY 1'b0; 84 | end // if (CLR) 85 | else if (ENQ) 86 | begin 87 | empty_reg <= `BSV_ASSIGNMENT_DELAY 1'b1; 88 | end // if (ENQ) 89 | else if (DEQ) 90 | begin 91 | empty_reg <= `BSV_ASSIGNMENT_DELAY 1'b0; 92 | end // if (DEQ) 93 | end // else: !if(RST == `BSV_RESET_VALUE) 94 | end // always@ (posedge CLK or `BSV_RESET_EDGE RST) 95 | 96 | always@(posedge CLK `BSV_ARESET_EDGE_HEAD) 97 | begin 98 | `ifdef BSV_RESET_FIFO_HEAD 99 | if (RST == `BSV_RESET_VALUE) 100 | begin 101 | D_OUT <= `BSV_ASSIGNMENT_DELAY {width {1'b0}} ; 102 | end 103 | else 104 | `endif 105 | begin 106 | if (ENQ) 107 | D_OUT <= `BSV_ASSIGNMENT_DELAY D_IN; 108 | end // else: !if(RST == `BSV_RESET_VALUE) 109 | end // always@ (posedge CLK or `BSV_RESET_EDGE RST) 110 | 111 | // synopsys translate_off 112 | always@(posedge CLK) 113 | begin: error_checks 114 | reg deqerror, enqerror ; 115 | 116 | deqerror = 0; 117 | enqerror = 0; 118 | if (RST == ! `BSV_RESET_VALUE) 119 | begin 120 | if ( ! empty_reg && DEQ ) 121 | begin 122 | deqerror = 1 ; 123 | $display( "Warning: FIFO1: %m -- Dequeuing from empty fifo" ) ; 124 | end 125 | if ( ! FULL_N && ENQ && (!DEQ || guarded) ) 126 | begin 127 | enqerror = 1 ; 128 | $display( "Warning: FIFO1: %m -- Enqueuing to a full fifo" ) ; 129 | end 130 | end // if (RST == ! `BSV_RESET_VALUE) 131 | end 132 | // synopsys translate_on 133 | 134 | endmodule 135 | 136 | -------------------------------------------------------------------------------- /src/bsv_verilog/BRAM2.v: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2000-2011 Bluespec, Inc. 2 | 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | // 21 | // $Revision: 28325 $ 22 | // $Date: 2012-04-25 18:22:57 +0000 (Wed, 25 Apr 2012) $ 23 | 24 | `ifdef BSV_ASSIGNMENT_DELAY 25 | `else 26 | `define BSV_ASSIGNMENT_DELAY 27 | `endif 28 | 29 | // Dual-Ported BRAM (WRITE FIRST) 30 | module BRAM2(CLKA, 31 | ENA, 32 | WEA, 33 | ADDRA, 34 | DIA, 35 | DOA, 36 | CLKB, 37 | ENB, 38 | WEB, 39 | ADDRB, 40 | DIB, 41 | DOB 42 | ); 43 | 44 | parameter PIPELINED = 0; 45 | parameter ADDR_WIDTH = 1; 46 | parameter DATA_WIDTH = 1; 47 | parameter MEMSIZE = 1; 48 | 49 | input CLKA; 50 | input ENA; 51 | input WEA; 52 | input [ADDR_WIDTH-1:0] ADDRA; 53 | input [DATA_WIDTH-1:0] DIA; 54 | output [DATA_WIDTH-1:0] DOA; 55 | 56 | input CLKB; 57 | input ENB; 58 | input WEB; 59 | input [ADDR_WIDTH-1:0] ADDRB; 60 | input [DATA_WIDTH-1:0] DIB; 61 | output [DATA_WIDTH-1:0] DOB; 62 | 63 | reg [DATA_WIDTH-1:0] RAM[0:MEMSIZE-1] /* synthesis syn_ramstyle="no_rw_check" */ ; 64 | reg [DATA_WIDTH-1:0] DOA_R; 65 | reg [DATA_WIDTH-1:0] DOB_R; 66 | reg [DATA_WIDTH-1:0] DOA_R2; 67 | reg [DATA_WIDTH-1:0] DOB_R2; 68 | 69 | `ifdef BSV_NO_INITIAL_BLOCKS 70 | `else 71 | // synopsys translate_off 72 | integer i; 73 | initial 74 | begin : init_block 75 | for (i = 0; i < MEMSIZE; i = i + 1) begin 76 | RAM[i] = { ((DATA_WIDTH+1)/2) { 2'b10 } }; 77 | end 78 | DOA_R = { ((DATA_WIDTH+1)/2) { 2'b10 } }; 79 | DOB_R = { ((DATA_WIDTH+1)/2) { 2'b10 } }; 80 | DOA_R2 = { ((DATA_WIDTH+1)/2) { 2'b10 } }; 81 | DOB_R2 = { ((DATA_WIDTH+1)/2) { 2'b10 } }; 82 | end 83 | // synopsys translate_on 84 | `endif // !`ifdef BSV_NO_INITIAL_BLOCKS 85 | 86 | always @(posedge CLKA) begin 87 | if (ENA) begin 88 | if (WEA) begin 89 | RAM[ADDRA] <= `BSV_ASSIGNMENT_DELAY DIA; 90 | DOA_R <= `BSV_ASSIGNMENT_DELAY DIA; 91 | end 92 | else begin 93 | DOA_R <= `BSV_ASSIGNMENT_DELAY RAM[ADDRA]; 94 | end 95 | end 96 | DOA_R2 <= `BSV_ASSIGNMENT_DELAY DOA_R; 97 | end 98 | 99 | always @(posedge CLKB) begin 100 | if (ENB) begin 101 | if (WEB) begin 102 | RAM[ADDRB] <= `BSV_ASSIGNMENT_DELAY DIB; 103 | DOB_R <= `BSV_ASSIGNMENT_DELAY DIB; 104 | end 105 | else begin 106 | DOB_R <= `BSV_ASSIGNMENT_DELAY RAM[ADDRB]; 107 | end 108 | end 109 | DOB_R2 <= `BSV_ASSIGNMENT_DELAY DOB_R; 110 | end 111 | 112 | // Output drivers 113 | assign DOA = (PIPELINED) ? DOA_R2 : DOA_R; 114 | assign DOB = (PIPELINED) ? DOB_R2 : DOB_R; 115 | 116 | endmodule // BRAM2 117 | -------------------------------------------------------------------------------- /projects/rv32i/Top.bsv: -------------------------------------------------------------------------------- 1 | import Clocks :: *; 2 | import Vector::*; 3 | import FIFO::*; 4 | import Uart::*; 5 | import BRAMSubWord::*; 6 | 7 | import Defines::*; 8 | import Processor::*; 9 | 10 | interface HwMainIfc; 11 | method ActionValue#(Bit#(8)) serial_tx; 12 | method Action serial_rx(Bit#(8) rx); 13 | endinterface 14 | 15 | module mkHwMain(HwMainIfc); 16 | Reg#(Bool) processorStart <- mkReg(False); 17 | ProcessorIfc proc <- mkProcessor; 18 | Reg#(Bit#(12)) imembytes <- mkReg(0); 19 | BRAMSubWord4Ifc#(12) imem <- mkBRAMSubWord; // 4KB 20 | Reg#(Bit#(12)) dmembytes <- mkReg(0); 21 | BRAMSubWord4Ifc#(12) dmem <- mkBRAMSubWord; // 4KB 22 | 23 | rule relayImemReq (processorStart); 24 | let req <- proc.iMemReq; 25 | imem.req(truncate(req.addr), req.word, req.bytes, req.write); 26 | endrule 27 | FIFO#(Bit#(8)) serialtxQ <- mkFIFO; 28 | rule relayDmemReq (processorStart); 29 | let req <- proc.dMemReq; 30 | if ( 0 == (req.addr>>12)) begin 31 | if ( req.write) begin 32 | serialtxQ.enq(truncate(req.word)); 33 | //$write("Writing! %x(%d) %x\n", req.addr,req.bytes, req.word); 34 | end else begin 35 | //$write("Reading! %x(%d) %x\n", req.addr,req.bytes, req.word); 36 | end 37 | end else begin 38 | dmem.req(truncate(req.addr), req.word, req.bytes, req.write); // truncating address should work automatically 39 | //$write("Reading! %x(%d) %x\n", req.addr,req.bytes, req.word); 40 | end 41 | endrule 42 | 43 | 44 | 45 | 46 | Reg#(Maybe#(Bit#(8))) serialCmd <- mkReg(tagged Invalid); 47 | 48 | rule procimemread; 49 | let d <- imem.resp; 50 | proc.iMemResp(d); 51 | //$write( "imem resp %x\n", d ); 52 | endrule 53 | rule procdmemread; 54 | let d <- dmem.resp; 55 | proc.dMemResp(d); 56 | //$write( "dmem resp %x\n",d ); 57 | endrule 58 | 59 | FIFO#(Bit#(8)) serialrxQ <- mkFIFO; 60 | rule procSerialRx; 61 | let d = serialrxQ.first; 62 | serialrxQ.deq; 63 | 64 | if ( !isValid(serialCmd) ) begin 65 | serialCmd <= tagged Valid d; 66 | end else begin 67 | serialCmd <= tagged Invalid; 68 | Bit#(8) cmd = fromMaybe(?, serialCmd); 69 | if ( cmd[0] == 0 ) begin // mem IO 70 | if ( cmd[1] == 0 ) begin // imem 71 | //if ( cmd[2] == 0 ) begin // mem write 72 | imembytes <= imembytes + 1; 73 | imem.req(imembytes, zeroExtend(d), 0, True); // write 1 byte to bwoff 74 | //end else begin // mem read 75 | // imem.req(imembytes, zeroExtend(d), 0, False); // read 1 byte from bwoff 76 | //end 77 | end else begin // dmem 78 | //if ( cmd[2] == 0 ) begin // mem write 79 | dmembytes <= dmembytes + 1; 80 | dmem.req(dmembytes, zeroExtend(d), 0, True); // write 1 byte to bwoff 81 | //end else begin // mem read 82 | // dmem.req(dmembytes, zeroExtend(d), 0, False); // read 1 byte from bwoff 83 | //end 84 | end 85 | end 86 | else begin // non-mem cmd, or mem cmd issued when processor started... 87 | $write( "Processor starting\n" ); 88 | processorStart <= True; 89 | end 90 | end 91 | endrule 92 | 93 | method ActionValue#(Bit#(8)) serial_tx; 94 | serialtxQ.deq; 95 | return serialtxQ.first; 96 | endmethod 97 | method Action serial_rx(Bit#(8) d); 98 | serialrxQ.enq(d); 99 | endmethod 100 | endmodule 101 | 102 | interface TopIfc; 103 | (* always_ready *) 104 | method Bit#(1) ftdi_txd; 105 | (* always_enabled, always_ready, prefix = "", result = "serial_rxd" *) 106 | method Action ftdi_rx(Bit#(1) ftdi_rxd); 107 | endinterface 108 | 109 | (* no_default_clock, no_default_reset*) 110 | module mkTop#(Clock clk_25mhz)(TopIfc); 111 | Reset rst_null = noReset(); 112 | UartIfc uart <- mkUart(2604, clocked_by clk_25mhz, reset_by rst_null); 113 | 114 | HwMainIfc main <- mkHwMain(clocked_by clk_25mhz, reset_by rst_null); 115 | 116 | rule relayUartIn; 117 | Bit#(8) d <- uart.user.get; 118 | main.serial_rx(d); 119 | endrule 120 | rule relllll; 121 | let d <- main.serial_tx; 122 | uart.user.send(d); 123 | endrule 124 | 125 | 126 | method Bit#(1) ftdi_txd; 127 | return uart.serial_txd; 128 | endmethod 129 | method Action ftdi_rx(Bit#(1) ftdi_rxd); 130 | uart.serial_rx(ftdi_rxd); 131 | endmethod 132 | endmodule 133 | 134 | module mkTop_bsim(Empty); 135 | HwMainIfc main <- mkHwMain; 136 | UartUserIfc uart <- mkUart_bsim; 137 | rule relayUartIn; 138 | let d <- uart.get; 139 | main.serial_rx(d); 140 | endrule 141 | rule relayUartOut; 142 | let d <- main.serial_tx; 143 | //$write( "uart sending serial\n" ); 144 | uart.send(d); 145 | endrule 146 | endmodule 147 | -------------------------------------------------------------------------------- /projects/nn_fc/HwMain.bsv: -------------------------------------------------------------------------------- 1 | import Clocks :: *; 2 | import Vector::*; 3 | import FIFO::*; 4 | import Uart::*; 5 | import BRAMSubWord::*; 6 | import PLL::*; 7 | import Sdram::*; 8 | 9 | import Mult18x18D::*; 10 | import SimpleFloat::*; 11 | 12 | import NnFc::*; 13 | 14 | 15 | interface HwMainIfc; 16 | method ActionValue#(Bit#(8)) serial_tx; 17 | method Action serial_rx(Bit#(8) rx); 18 | endinterface 19 | 20 | module mkHwMain#(Ulx3sSdramUserIfc mem) (HwMainIfc); 21 | Clock curclk <- exposeCurrentClock; 22 | Reset currst <- exposeCurrentReset; 23 | 24 | Reg#(Bit#(32)) cycleCount <- mkReg(0); 25 | rule incCycleCount; 26 | cycleCount <= cycleCount + 1; 27 | endrule 28 | 29 | FIFO#(Bit#(8)) serialrxQ <- mkFIFO; 30 | FIFO#(Bit#(8)) serialtxQ <- mkFIFO; 31 | 32 | NnFcIfc nn <- mkNnFc; 33 | 34 | Reg#(Maybe#(Bit#(8))) inputDst <- mkReg(tagged Invalid); 35 | rule recvInputDst(!isValid(inputDst)); 36 | Bit#(8) charin = serialrxQ.first(); 37 | serialrxQ.deq; 38 | inputDst <= tagged Valid charin; 39 | endrule 40 | Reg#(Bit#(32)) inputBuffer <- mkReg(0); 41 | Reg#(Bit#(2)) inputBufferCnt <- mkReg(0); 42 | //Reg#(Bit#(24)) memWriteOffset <- mkReg(0); 43 | Reg#(Bool) memWriteDone <- mkReg(False); 44 | FIFO#(Bit#(32)) memWriteQ <- mkFIFO; 45 | rule recvInputFloat(isValid(inputDst)); 46 | Bit#(8) charin = serialrxQ.first(); 47 | serialrxQ.deq; 48 | Bit#(32) nv = (inputBuffer>>8)|(zeroExtend(charin)<<24); 49 | inputBuffer <= nv; 50 | 51 | if ( inputBufferCnt == 3 ) begin 52 | inputBufferCnt <= 0; 53 | inputDst <= tagged Invalid; 54 | let id = fromMaybe(?,inputDst); 55 | if ( id != 8'hff ) begin 56 | nn.dataIn(unpack(nv), id); 57 | memWriteDone <= True; 58 | end else begin 59 | memWriteQ.enq(nv); 60 | // write to mem 61 | //$write( "Writing %x to mem %d\n", nv, memWriteOffset ); 62 | //memWriteOffset <= memWriteOffset + 1; 63 | end 64 | end else begin 65 | inputBufferCnt <= inputBufferCnt + 1; 66 | end 67 | endrule 68 | 69 | Reg#(Bit#(40)) outputBuffer <- mkReg(0); // {float,outidx}, inidx is sent immediately 70 | Reg#(Bit#(3)) outputBufferCnt <- mkReg(0); 71 | Reg#(Bit#(32)) resultDataCount <- mkReg(0); 72 | Reg#(Bit#(32)) lastCycle <- mkReg(0); 73 | Reg#(Bit#(32)) lastEmitted <- mkReg(0); 74 | rule serializeOutput; 75 | if ( outputBufferCnt > 0 ) begin 76 | outputBufferCnt <= outputBufferCnt - 1; 77 | serialtxQ.enq(truncate(outputBuffer)); 78 | outputBuffer <= (outputBuffer>>8); 79 | end else begin 80 | if ( resultDataCount == 0 ) begin 81 | lastCycle <= cycleCount; 82 | lastEmitted <= resultDataCount; 83 | end 84 | else if (((resultDataCount + 1)&32'hff) == 0 ) begin 85 | $write( "Emitting %d elements over %d cycles\n", resultDataCount-lastEmitted, cycleCount-lastCycle ); 86 | lastCycle <= cycleCount; 87 | lastEmitted <= resultDataCount; 88 | end 89 | resultDataCount <= resultDataCount + 1; 90 | let r <- nn.dataOut; 91 | serialtxQ.enq(tpl_2(r)); // input idx first 92 | outputBuffer <= {pack(tpl_1(r)),tpl_3(r)}; 93 | outputBufferCnt <= 5; 94 | end 95 | endrule 96 | 97 | 98 | 99 | Reg#(Maybe#(Bit#(16))) memWriteBuffer <- mkReg(tagged Invalid); 100 | Reg#(Bit#(24)) memWriteAddr <- mkReg(0); 101 | rule procMemWrite; 102 | if ( isValid(memWriteBuffer) ) begin 103 | memWriteBuffer <= tagged Invalid; 104 | mem.req(memWriteAddr,fromMaybe(?,memWriteBuffer),True); 105 | end else begin 106 | memWriteQ.deq; 107 | let d = memWriteQ.first; 108 | mem.req(memWriteAddr,truncate(d),True); 109 | memWriteBuffer <= tagged Valid truncate(d>>16); 110 | end 111 | memWriteAddr <= memWriteAddr + 1; 112 | endrule 113 | 114 | Reg#(Bit#(24)) memReadAddr <- mkReg(0); 115 | (* descending_urgency = "procMemWrite, procMemRead" *) 116 | rule procMemRead (memWriteDone); 117 | if ( memReadAddr + 1 == memWriteAddr ) memReadAddr <= 0; 118 | else memReadAddr <= memReadAddr + 1; 119 | mem.req(memReadAddr,?,False); 120 | endrule 121 | 122 | Reg#(Maybe#(Bit#(16))) memReadBuffer <- mkReg(tagged Invalid); 123 | rule procMemReadResp; 124 | let d <- mem.readResp; 125 | if ( isValid(memReadBuffer) ) begin 126 | nn.weightIn(unpack({d,fromMaybe(?,memReadBuffer)})); 127 | memReadBuffer <= tagged Invalid; 128 | end else begin 129 | memReadBuffer <= tagged Valid d; 130 | end 131 | endrule 132 | 133 | method ActionValue#(Bit#(8)) serial_tx; 134 | serialtxQ.deq; 135 | return serialtxQ.first(); 136 | endmethod 137 | method Action serial_rx(Bit#(8) d); 138 | serialrxQ.enq(d); 139 | endmethod 140 | endmodule 141 | -------------------------------------------------------------------------------- /src/bsv_verilog/ClockDiv.v: -------------------------------------------------------------------------------- 1 | 2 | // Copyright (c) 2000-2012 Bluespec, Inc. 3 | 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files (the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions: 10 | 11 | // The above copyright notice and this permission notice shall be included in 12 | // all copies or substantial portions of the Software. 13 | 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | // THE SOFTWARE. 21 | // 22 | // $Revision: 29441 $ 23 | // $Date: 2012-08-27 21:58:03 +0000 (Mon, 27 Aug 2012) $ 24 | 25 | `ifdef BSV_ASSIGNMENT_DELAY 26 | `else 27 | `define BSV_ASSIGNMENT_DELAY 28 | `endif 29 | 30 | `ifdef BSV_POSITIVE_RESET 31 | `define BSV_RESET_VALUE 1'b1 32 | `define BSV_RESET_EDGE posedge 33 | `else 34 | `define BSV_RESET_VALUE 1'b0 35 | `define BSV_RESET_EDGE negedge 36 | `endif 37 | 38 | 39 | // A clock divider circuit. 40 | // Division is based on the parameters, where 41 | // Division is upper - lower + 1 42 | // Duty cycle is : 43 | // let half = 1 << (width-1) 44 | // (upper - half) / upper - lower + 1 45 | // E.g., (2,1,3) is a divide by 3 duty cycle 2/3 46 | // (2,0,3) is a divide by 4 duty cycle 2/4 47 | // (1,0,1) is a divide by 2, duty cycle 1/2 48 | // (3,1,5) is a divide by 5 duty cycle 2/5 49 | // (3,2,6) is a divide by 5 duty cycle 3/5 50 | // The offset allow edges for seperate modules to be determined 51 | // relative to each other. a clock divider with offset 1 occurs one 52 | // (fast) clock later than a clock with offset 0. 53 | module ClockDiv(CLK_IN, RST, PREEDGE, CLK_OUT); 54 | 55 | parameter width = 2 ; // must be sufficient to hold upper 56 | parameter lower = 1 ; // 57 | parameter upper = 3 ; 58 | parameter offset = 0; // offset for relative edges. 59 | // (0 <= offset <= (upper - lower) 60 | 61 | input CLK_IN; // input clock 62 | input RST; 63 | 64 | output PREEDGE; // output signal announcing an upcoming edge 65 | output CLK_OUT; // output clock 66 | 67 | reg [ width -1 : 0 ] cntr ; 68 | reg PREEDGE ; 69 | 70 | // Wire constants for the parameters 71 | wire [width-1:0] upper_w ; 72 | wire [width-1:0] lower_w ; 73 | 74 | assign CLK_OUT = cntr[width-1] ; 75 | assign upper_w = upper ; 76 | assign lower_w = lower ; 77 | 78 | // The clock is about to tick when counter is about to set its msb 79 | // Note some simulators do not allow 0 width expressions 80 | wire [width-1:0] nexttick = ~ ( 'b01 << (width-1) ) ; 81 | 82 | // Combinational block to generate next edge signal 83 | always@( cntr or nexttick ) 84 | begin 85 | #0 86 | // The nonblocking assignment use to delay the update of the edge ready signal 87 | // Since this read by other always blocks trigger by the output CLK of this module 88 | PREEDGE <= `BSV_ASSIGNMENT_DELAY (cntr == nexttick) ; 89 | end 90 | 91 | always@( posedge CLK_IN or `BSV_RESET_EDGE RST ) 92 | begin 93 | // The use of blocking assignment within this block insures 94 | // that the clock generated from cntr[MSB] occurs before any 95 | // LHS of nonblocking assigments also from CLK_IN occur. 96 | // Basically, this insures that CLK_OUT and CLK_IN occur within 97 | // the same phase of the execution cycle, before any state 98 | // updates occur. see 99 | // http://www.sunburst-design.com/papers/CummingsSNUG2002Boston_NBAwithDelays.pdf 100 | 101 | if ( RST == `BSV_RESET_VALUE ) 102 | cntr = upper - offset ; 103 | else 104 | begin 105 | if ( cntr < upper_w ) 106 | cntr = cntr + 1 ; 107 | else 108 | cntr = lower_w ; 109 | end // else: !if( RST == `BSV_RESET_VALUE ) 110 | end // always@ ( posedge CLK_IN or `BSV_RESET_EDGE RST ) 111 | 112 | `ifdef BSV_NO_INITIAL_BLOCKS 113 | `else // not BSV_NO_INITIAL_BLOCKS 114 | // synopsys translate_off 115 | initial 116 | begin 117 | #0 ; 118 | cntr = (upper - offset) ; 119 | PREEDGE = 0 ; 120 | end // initial begin 121 | // synopsys translate_on 122 | `endif // BSV_NO_INITIAL_BLOCKS 123 | 124 | 125 | endmodule // ClockDiv 126 | -------------------------------------------------------------------------------- /projects/nn_fc_zfpe/ZfpCompress.bsv: -------------------------------------------------------------------------------- 1 | import Connectable::*; 2 | import FIFO::*; 3 | import FIFOF::*; 4 | import Vector::*; 5 | import SimpleFloat::*; 6 | import FloatingPoint::*; 7 | 8 | typedef 33 ZfpComprTypeSz; // e = 9, block = bit_budget(5)*4 = 20, flag = (0 or 1)*4 = 4 9 | typedef TSub#(ZfpComprTypeSz,9) CompressedBitsTotal; 10 | typedef TDiv#(CompressedBitsTotal,4) CompressedBitsEach; 11 | typedef TDiv#(TSub#(CompressedBitsTotal,4),4) BitBudget; 12 | 13 | Integer total = 33; 14 | Integer compressed_total = total - 9; 15 | Integer compressed_each = compressed_total/4; 16 | Integer budget = compressed_each - 1; 17 | 18 | interface ZfpCompressIfc; 19 | method Action put(Bit#(32) data); 20 | method ActionValue#(Bit#(ZfpComprTypeSz)) get; 21 | endinterface 22 | 23 | function Bit#(32) int_to_uint(Bit#(32) t); 24 | return (t + 32'haaaaaaaa) ^ 32'haaaaaaaa; 25 | endfunction 26 | 27 | function Bit#(32) intShift(Bit#(32) t); 28 | Bit#(1) s; 29 | s = t[31]; 30 | t = t >> 1; 31 | t[31] = s; 32 | return t; 33 | endfunction 34 | 35 | (* synthesize *) 36 | module mkZfpCompress (ZfpCompressIfc); 37 | 38 | FIFO#(Bit#(32)) inputQ <- mkFIFO; 39 | 40 | FIFO#(Vector#(4,Bit#(32))) toLift_1 <- mkFIFO; 41 | FIFO#(Vector#(4,Bit#(32))) toLift_2 <- mkFIFO; 42 | FIFO#(Vector#(4,Bit#(32))) toLift_3 <- mkFIFO; 43 | FIFO#(Vector#(4,Bit#(32))) toLift_4 <- mkFIFO; 44 | FIFO#(Vector#(4,Bit#(32))) toConvertBits <- mkFIFO; 45 | FIFO#(Vector#(4,Bit#(32))) toEncode <- mkFIFO; 46 | 47 | FIFO#(Bit#(9)) eQ <- mkSizedFIFO(32); 48 | FIFO#(Bit#(CompressedBitsTotal)) toGetResult <- mkFIFO; 49 | FIFO#(Bit#(ZfpComprTypeSz)) outputQ <- mkFIFO; 50 | 51 | Reg#(Bit#(3)) inputCnt <- mkReg(0); 52 | Reg#(Bit#(8)) expMax <- mkReg(0); 53 | Vector#(4,Reg#(Bit#(32))) inputBuffer <- replicateM(mkReg(0)); 54 | rule getMaxExp; 55 | inputQ.deq; 56 | let d = inputQ.first; 57 | Vector#(4,Bit#(32)) in = replicate(0); 58 | Vector#(4,Bit#(32)) idata = replicate(0); 59 | Bit#(8) matrixExp = 0; 60 | Bit#(9) e = 0; 61 | 62 | if ( inputCnt == fromInteger(4) ) begin 63 | for ( Integer i = 0; i < 4; i = i + 1 ) begin 64 | in[i] = inputBuffer[i]; 65 | end 66 | inputBuffer[0] <= d; 67 | inputCnt <= 1; 68 | 69 | e = zeroExtend(expMax) + fromInteger(127); 70 | 71 | for ( Integer i = 0; i < 4; i = i + 1 ) begin 72 | idata[i] = in[i] << ((32-2) - expMax); 73 | end 74 | 75 | matrixExp = truncateLSB(d<<1); 76 | expMax <= matrixExp; 77 | toLift_1.enq(idata); 78 | eQ.enq(e); 79 | end else begin 80 | matrixExp = truncateLSB(d<<1); 81 | 82 | if ( inputCnt == fromInteger(0) ) begin 83 | expMax <= matrixExp; 84 | end else begin 85 | if ( matrixExp > expMax ) expMax <= matrixExp; 86 | end 87 | 88 | inputBuffer[inputCnt] <= d; 89 | inputCnt <= inputCnt + 1; 90 | end 91 | endrule 92 | 93 | rule lift; 94 | toLift_1.deq; 95 | let in = toLift_1.first; 96 | in[0] = (in[0]+in[3]); in[0] = intShift(in[0]); in[3] = (in[3]-in[0]); 97 | in[2] = (in[2]+in[1]); 98 | toLift_2.enq(in); 99 | endrule 100 | rule lift_2; 101 | toLift_2.deq; 102 | let in = toLift_2.first; 103 | in[2] = intShift(in[2]); in[1] = (in[1]-in[2]); 104 | in[0] = (in[0]+in[2]); in[0] = intShift(in[0]); 105 | toLift_3.enq(in); 106 | endrule 107 | rule lift_3; 108 | toLift_3.deq; 109 | let in = toLift_3.first; 110 | in[2] = (in[2]-in[0]); 111 | in[3] = (in[3]+in[1]); in[3] = intShift(in[3]); in[1] = (in[1]-in[3]); 112 | toLift_4.enq(in); 113 | endrule 114 | rule lift_4; 115 | toLift_4.deq; 116 | let in = toLift_4.first; 117 | in[3] = (in[3]+ intShift(in[1])); in[1] = (in[1] - (intShift(in[3]))); 118 | toConvertBits.enq(in); 119 | endrule 120 | 121 | rule convertBits; 122 | toConvertBits.deq; 123 | Vector#(4,Bit#(32)) in = toConvertBits.first; 124 | for (Bit#(5)i = 0; i < 4; i = i + 1) begin 125 | in[i] = int_to_uint(in[i]); 126 | end 127 | toEncode.enq(in); 128 | endrule 129 | 130 | rule encode; 131 | toEncode.deq; 132 | Vector#(4,Bit#(32)) udata = toEncode.first; 133 | Vector#(4,Bit#(32)) udataCand_1 = replicate(0); 134 | Vector#(4,Bit#(32)) udataCand_2 = replicate(0); 135 | Bit#(CompressedBitsTotal) out = 0; 136 | 137 | for ( Bit#(5) i = 0; i < 4; i = i + 1 ) begin 138 | udataCand_1[i] = udata[i]>>((32-4)-budget); 139 | end 140 | 141 | for ( Bit#(5) i = 0; i < 4; i = i + 1 ) begin 142 | udataCand_2[i] = udata[i]>>(32-budget); 143 | end 144 | 145 | for ( Integer i = 0; i < 4; i = i+1 ) begin 146 | if ( (udata[i]>>28) == 0 ) begin 147 | out[i*compressed_each] = 0; 148 | out[budget+(i*compressed_each):(i*compressed_each)+1] = udataCand_1[i][4:0]; 149 | end else begin 150 | out[i*compressed_each] = 1; 151 | out[budget+(i*compressed_each):(i*compressed_each)+1] = udataCand_2[i][4:0]; 152 | end 153 | end 154 | toGetResult.enq(out); 155 | endrule 156 | 157 | rule getResult; 158 | toGetResult.deq; 159 | eQ.deq; 160 | Bit#(CompressedBitsTotal) d = toGetResult.first; 161 | Bit#(9) e = eQ.first; 162 | Bit#(ZfpComprTypeSz) comp = 0; 163 | 164 | comp[8:0] = e; 165 | comp[(total-1):9] = d; 166 | 167 | outputQ.enq(comp); 168 | endrule 169 | 170 | method Action put(Bit#(32) data); 171 | inputQ.enq(data); 172 | endmethod 173 | method ActionValue#(Bit#(ZfpComprTypeSz)) get; 174 | outputQ.deq; 175 | return outputQ.first; 176 | endmethod 177 | endmodule 178 | -------------------------------------------------------------------------------- /src/verilog/sdram.v: -------------------------------------------------------------------------------- 1 | // taken from "sdram_pnru.v" 2 | // 3 | // Simplistic SDRAM controller for 4x4Mx16 chips (e.g. micron MT48LC16M16A2) 4 | // - makes one sdram access for each system access 5 | // - can keep all 4 banks open 6 | // - distributed refresh 7 | // 8 | // This source code is public domain 9 | // 10 | module sdram_pnru ( 11 | // system interface 12 | input wire sys_clk, // CLK 125 Mhz 13 | input wire sys_rd, // read word 14 | input wire sys_wr, // write word 15 | output reg sys_rdy = 1'b0, // mem ready 16 | input wire sys_ack, // mem cycle end 17 | input wire [23:0] sys_ab, // address 18 | input wire [15:0] sys_di, // data in 19 | output reg [15:0] sys_do, // data out 20 | 21 | // sdram interface 22 | output wire [3:0] sdr_n_CS_WE_RAS_CAS, // SDRAM nCS, nWE, nRAS, nCAS 23 | output wire [1:0] sdr_ba, // SDRAM bank address 24 | output reg [12:0] sdr_ab, // SDRAM address 25 | inout wire [15:0] sdr_db, // SDRAM data 26 | output reg [1:0] sdr_dqm = 2'b11 // SDRAM DQM 27 | ); 28 | 29 | // SDRAM timing parameters 30 | localparam tRP = 3, tMRD = 2, tRCD = 3, tRC = 9, CL = 3; 31 | localparam INITLEN = 12500; // 125 * 100us; 32 | localparam RFTIME = 975; // 125 * 7.8us; 33 | 34 | // cross clock domain 35 | reg [15:0] di; 36 | reg [23:0] ab; 37 | reg rd, wr, ack; 38 | always @(posedge sys_clk) begin 39 | di <= sys_di; 40 | ab <= sys_ab; 41 | rd <= sys_rd; 42 | wr <= sys_wr; 43 | ack <= sys_ack; 44 | end 45 | // wire di = sys_di; 46 | 47 | // dataflow assignments 48 | assign sdr_n_CS_WE_RAS_CAS = sdr_cmd; 49 | assign sdr_ba = (state==CONFIG) ? 2'b00 : ab[10:9]; 50 | assign sdr_db = (state==RWRDY && wr) ? di : 16'hzzzz; 51 | 52 | // controller states & FSM 53 | localparam IDLE = 0, RFRSH1 = 1, RFRSH2 = 2, CONFIG = 3, RDWR = 4, RWRDY = 5, ACKWT = 6, WAIT = 7; 54 | 55 | reg [3:0] sdr_cmd; // command issued to sdram 56 | reg [2:0] state = IDLE; // FSM state 57 | reg [2:0] next; // new state after waiting 58 | reg [15:0] ctr = 0; // refresh/init counter 59 | reg [6:0] dly; // FSM delay counter 60 | reg [3:0] open; // open bank flags 61 | reg [12:0] opnrow[0:3]; // open row numbers 62 | 63 | // convenience signals 64 | wire init = sdr_dqm[0]; // note: DQM==2'b11 during init, 2b'00 thereafter 65 | wire [8:0] col = ab[8:0]; 66 | wire [12:0] row = ab[23:11]; 67 | wire [2:0] ba = ab[10:9]; 68 | 69 | // SDRAM command codes 70 | localparam NOP = 4'b1000, PRECHRG = 4'b0001, AUTORFRSH = 4'b0100, MODESET = 4'b0000, 71 | READ = 4'b0110, WRITE = 4'b0010, ACTIVATE = 4'b0101; 72 | 73 | // SDRAM mode register: single word reads & writes, CL=3, sequential access 74 | localparam MODE = 13'b000_1_00_011_0_000; 75 | 76 | always @(posedge sys_clk) 77 | begin 78 | ctr <= ctr + 1; 79 | dly <= dly - 1; 80 | 81 | // default FSM operations 82 | sdr_cmd <= NOP; 83 | state <= WAIT; 84 | 85 | case (state) 86 | 87 | IDLE: 88 | if (init) state <= (ctr==INITLEN) ? RFRSH1 : IDLE; // remain in idle for >100us at init 89 | else begin 90 | sys_rdy <= 1'b0; 91 | if (ctr>=RFTIME) state <= RFRSH1; // as needed, refresh a row 92 | else if (rd|wr) state <= RDWR; // else respond to rd or wr request 93 | else begin 94 | sys_rdy <= 1'b1; 95 | state <= IDLE; // else do nothing 96 | end 97 | end 98 | 99 | RFRSH1: begin // prior to refresh, close all banks 100 | sdr_cmd <= PRECHRG; 101 | sdr_ab[10] <= 1'b1; // do all banks 102 | open <= 4'b000; 103 | dly <= tRP-2; next <= (init) ? CONFIG : RFRSH2; // insert config step during init 104 | end 105 | 106 | RFRSH2: begin // refresh one row, then return to IDLE 107 | sdr_cmd <= AUTORFRSH; 108 | sdr_dqm <= 2'b00; // end init 109 | ctr <= 0; // reset timeout 110 | dly <= tRC-2; next <= (init) ? RFRSH2 : IDLE; // repeat AUTORFRSH during init 111 | end 112 | 113 | CONFIG: begin // load the mode register 114 | sdr_cmd <= MODESET; 115 | sdr_ab <= MODE; 116 | dly <= tMRD-2; next <= RFRSH2; 117 | end 118 | 119 | RDWR: begin // issue read or write command 120 | if (!open[ba]) begin // if not open, open row first & restart 121 | sdr_cmd <= ACTIVATE; 122 | sdr_ab <= row; 123 | open[ba] <= 1'b1; // mark as open 124 | opnrow[ba] <= row; 125 | dly <= tRCD-2; next <= RDWR; 126 | end 127 | else if (opnrow[ba]!=row) begin // if wrong row, close it & restart 128 | sdr_cmd <= PRECHRG; 129 | sdr_ab[10] <= 1'b0; // for one bank only 130 | open[ba] <= 1'b0; // mark as closed 131 | dly <= tRP-2; next <= RDWR; 132 | end 133 | else begin // all good, issue R/W command 134 | sdr_cmd <= (rd) ? READ : WRITE; 135 | sdr_ab <= {4'b0000, col}; 136 | dly <= CL-1; next <= RWRDY; 137 | if (wr) state <= RWRDY; // no delay needed for writes 138 | end 139 | end 140 | 141 | RWRDY: begin // latch read result, or write data 142 | sys_rdy <= 1'b1; 143 | if (rd) sys_do <= sdr_db; 144 | state <= ACKWT; 145 | end 146 | 147 | ACKWT: // wait for system to end current r/w cycle 148 | state <= (ack) ? IDLE : ACKWT; 149 | 150 | WAIT: // wait 'dly' clocks before progressing to state 'next' 151 | state <= (|dly) ? WAIT : next; 152 | 153 | endcase 154 | end 155 | 156 | endmodule 157 | -------------------------------------------------------------------------------- /projects/rv32i/processor/Processor.bsv: -------------------------------------------------------------------------------- 1 | import FIFO::*; 2 | import FIFOF::*; 3 | import SpecialFIFOs::*; 4 | 5 | 6 | import RFile::*; 7 | import Defines::*; 8 | import Decode::*; 9 | import Execute::*; 10 | 11 | import Scoreboard::*; 12 | import BranchPredictor::*; 13 | 14 | typedef struct { 15 | Word pc; 16 | } F2D deriving(Bits, Eq); 17 | 18 | typedef struct { 19 | Word pc; 20 | DecodedInst dInst; 21 | Word rVal1; 22 | Word rVal2; 23 | } D2E deriving(Bits, Eq); 24 | 25 | typedef struct { 26 | Word pc; 27 | RIndx dst; 28 | 29 | Bool isMem; 30 | 31 | Word data; 32 | Bool extendSigned; 33 | SizeType size; 34 | } E2M deriving(Bits,Eq); 35 | 36 | interface ProcessorIfc; 37 | method ActionValue#(MemReq32) iMemReq; 38 | method Action iMemResp(Word data); 39 | method ActionValue#(MemReq32) dMemReq; 40 | method Action dMemResp(Word data); 41 | endinterface 42 | 43 | (* synthesize *) 44 | module mkProcessor(ProcessorIfc); 45 | Reg#(Word) pc <- mkReg(0); 46 | RFile2R1W rf <- mkRFile2R1W; 47 | 48 | BranchPredictorIfc branch_predictor <- mkBranchPredictor; 49 | 50 | Reg#(ProcStage) stage <- mkReg(Fetch); 51 | 52 | FIFOF#(F2D) f2d <- mkSizedFIFOF(2); 53 | FIFOF#(D2E) d2e <- mkSizedFIFOF(2); 54 | FIFOF#(E2M) e2m <- mkSizedFIFOF(2); 55 | 56 | FIFO#(MemReq32) imemReqQ <- mkFIFO; 57 | FIFO#(Word) imemRespQ <- mkFIFO; 58 | FIFO#(MemReq32) dmemReqQ <- mkFIFO; 59 | FIFO#(Word) dmemRespQ <- mkFIFO; 60 | 61 | 62 | Reg#(Bit#(32)) cycles <- mkReg(0); 63 | Reg#(Bit#(32)) fetchCnt <- mkReg(0); 64 | Reg#(Bit#(32)) execCnt <- mkReg(0); 65 | rule incCycle; 66 | cycles <= cycles + 1; 67 | endrule 68 | 69 | rule doFetch (stage == Fetch); 70 | //Word curpc = branch_predictor.getNextPc(pc); 71 | Word curpc = pc; 72 | 73 | imemReqQ.enq(MemReq32{write:False,addr:truncate(pc),word:?,bytes:3}); 74 | f2d.enq(F2D {pc: curpc}); 75 | 76 | $write( "[0x%8x:0x%4x] Fetching instruction count 0x%4x\n", cycles, curpc, fetchCnt ); 77 | fetchCnt <= fetchCnt + 1; 78 | stage <= Decode; 79 | endrule 80 | 81 | 82 | 83 | 84 | 85 | rule doDecode (stage == Decode); 86 | let x = f2d.first; 87 | f2d.deq; 88 | Word inst = imemRespQ.first; 89 | imemRespQ.deq; 90 | 91 | let dInst = decode(inst); 92 | let rVal1 = rf.rd1(dInst.src1); 93 | let rVal2 = rf.rd2(dInst.src2); 94 | 95 | d2e.enq(D2E {pc: x.pc, dInst: dInst, rVal1: rVal1, rVal2: rVal2}); 96 | 97 | $write( "[0x%8x:0x%04x] decoding 0x%08x\n", cycles, x.pc, inst ); 98 | stage <= Execute; 99 | endrule 100 | 101 | 102 | 103 | 104 | 105 | 106 | rule doExecute (stage == Execute); 107 | D2E x = d2e.first; 108 | d2e.deq; 109 | Word curpc = x.pc; 110 | Word rVal1 = x.rVal1; Word rVal2 = x.rVal2; 111 | DecodedInst dInst = x.dInst; 112 | 113 | let eInst = exec(dInst, rVal1, rVal2, curpc); 114 | 115 | pc <= eInst.nextPC; 116 | branch_predictor.setPrediction(curpc, eInst.nextPC); 117 | 118 | execCnt <= execCnt + 1; 119 | $write( "[0x%8x:0x%04x] Executing\n", cycles, curpc ); 120 | 121 | if (eInst.iType == Unsupported) begin 122 | $display("Reached unsupported instruction"); 123 | $display("Total Clock Cycles = %d\nTotal Instruction Count = %d", cycles, execCnt); 124 | $display("Dumping the state of the processor"); 125 | $display("pc = 0x%x", x.pc); 126 | //rf.displayRFileInSimulation; 127 | $display("Quitting simulation."); 128 | $finish; 129 | end 130 | 131 | if (eInst.iType == LOAD) begin 132 | dmemReqQ.enq(MemReq32{write:False,addr:truncate(eInst.addr), word:?, bytes:dInst.size}); 133 | e2m.enq(E2M{dst:eInst.dst,extendSigned:dInst.extendSigned,size:dInst.size, pc:curpc, data:0, isMem: True}); 134 | stage <= Writeback; 135 | $write( "[0x%8x:0x%04x] \t\t Mem read from 0x%08x\n", cycles, curpc, eInst.addr ); 136 | end 137 | else if (eInst.iType == STORE) begin 138 | dmemReqQ.enq(MemReq32{write:True,addr:truncate(eInst.addr), word:eInst.data, bytes:dInst.size}); 139 | $write( "[0x%8x:0x%04x] \t\t Mem write 0x%08x to 0x%08x\n", cycles, curpc, eInst.data, eInst.addr ); 140 | //e2m.enq(E2M{dst:0,extendSigned:?,size:?, pc:curpc, data:?, isMem: False}); 141 | stage <= Fetch; 142 | end 143 | else begin 144 | if(eInst.writeDst) begin 145 | e2m.enq(E2M{dst:eInst.dst,extendSigned:?,size:?, pc:curpc, data:eInst.data, isMem: False}); 146 | stage <= Writeback; 147 | end else begin 148 | stage <= Fetch; 149 | //e2m.enq(E2M{dst:0,extendSigned:?,size:?, pc:curpc, data:?, isMem: False}); 150 | end 151 | end 152 | endrule 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | rule doWriteback (stage == Writeback); 161 | e2m.deq; 162 | let r = e2m.first; 163 | 164 | Word dw = r.data; 165 | if ( r.isMem ) begin 166 | dmemRespQ.deq; 167 | let data = dmemRespQ.first; 168 | 169 | if ( r.size == 0 ) begin 170 | if ( r.extendSigned ) begin 171 | Int#(8) id = unpack(data[7:0]); 172 | Int#(32) ide = signExtend(id); 173 | dw = pack(ide); 174 | end else begin 175 | dw = zeroExtend(data[7:0]); 176 | end 177 | end else if ( r.size == 1 ) begin 178 | if ( r.extendSigned ) begin 179 | Int#(16) id = unpack(data[15:0]); 180 | Int#(32) ide = signExtend(id); 181 | dw = pack(ide); 182 | end else begin 183 | dw = zeroExtend(data[15:0]); 184 | end 185 | end else begin 186 | dw = data; 187 | end 188 | end 189 | 190 | $write( "[0x%8x:0x%04x] Writeback writing %x to %d\n", cycles, r.pc, dw, r.dst ); 191 | rf.wr(r.dst, dw); 192 | 193 | 194 | stage <= Fetch; 195 | endrule 196 | 197 | 198 | 199 | 200 | 201 | 202 | method ActionValue#(MemReq32) iMemReq; 203 | imemReqQ.deq; 204 | return imemReqQ.first; 205 | endmethod 206 | method Action iMemResp(Word data); 207 | imemRespQ.enq(data); 208 | endmethod 209 | method ActionValue#(MemReq32) dMemReq; 210 | dmemReqQ.deq; 211 | return dmemReqQ.first; 212 | endmethod 213 | method Action dMemResp(Word data); 214 | dmemRespQ.enq(data); 215 | endmethod 216 | endmodule 217 | -------------------------------------------------------------------------------- /projects/nn_fc/NnFc.bsv: -------------------------------------------------------------------------------- 1 | import FIFO::*; 2 | import FIFOF::*; 3 | import Vector::*; 4 | import BRAMFIFO::*; 5 | 6 | import SimpleFloat::*; 7 | import FloatingPoint::*; 8 | 9 | 10 | interface MacPeIfc; 11 | method Action putInput(Float v, Bit#(8) input_idx); 12 | method Action putWeight(Float w); 13 | method ActionValue#(Tuple3#(Float, Bit#(8), Bit#(8))) resultGet; 14 | method Bool resultExist; 15 | endinterface 16 | 17 | typedef 4 PeWaysLog; 18 | typedef TExp#(PeWaysLog) PeWays; 19 | 20 | Integer inputDim = 1024; 21 | Integer outputDim = 64; 22 | 23 | module mkMacPe#(Bit#(PeWaysLog) peIdx) (MacPeIfc); 24 | Reg#(Bit#(32)) cycleCount <- mkReg(0); 25 | rule incCycleCount; 26 | cycleCount <= cycleCount + 1; 27 | endrule 28 | 29 | FIFO#(Float) weightQ <- mkFIFO; 30 | FIFO#(Tuple2#(Float,Bit#(8))) inputQ <- mkFIFO; 31 | FIFOF#(Tuple3#(Float, Bit#(8), Bit#(8))) outputQ <- mkFIFOF; 32 | 33 | 34 | FloatTwoOp fmult <- mkFloatMult; 35 | FloatTwoOp fadd <- mkFloatAdd; 36 | FIFO#(Float) addForwardQ <- mkSizedFIFO(4); 37 | FIFO#(Tuple3#(Bit#(8),Bit#(8),Bit#(16))) partialSumIdxQ3 <- mkFIFO1; 38 | FIFO#(Tuple3#(Bit#(8),Bit#(8),Bit#(16))) partialSumIdxQ2 <- mkSizedBRAMFIFO(128); 39 | FIFO#(Tuple3#(Bit#(8),Bit#(8),Bit#(16))) partialSumIdxQ1 <- mkFIFO1; 40 | FIFO#(Float) partialSumQ <- mkSizedBRAMFIFO(128); 41 | FIFO#(Float) partialSumQ2 <- mkFIFO; 42 | 43 | Reg#(Bit#(8)) lastInputIdx <- mkReg(0); 44 | Reg#(Bit#(8)) curOutputIdx <- mkReg(zeroExtend(peIdx)); 45 | Reg#(Bit#(12)) curMacIdx <- mkReg(0); 46 | rule enqMac; 47 | inputQ.deq; 48 | Float inf = tpl_1(inputQ.first); 49 | Bit#(8) ini = tpl_2(inputQ.first); 50 | weightQ.deq; 51 | Float wf = weightQ.first; 52 | 53 | partialSumIdxQ1.enq(tuple3(ini,curOutputIdx,zeroExtend(curMacIdx))); 54 | if ( curMacIdx + 1 >= fromInteger(inputDim) ) begin 55 | curMacIdx <= 0; 56 | let nextOutIdx = curOutputIdx + fromInteger(valueOf(PeWays)); 57 | if ( nextOutIdx >= fromInteger(outputDim) ) begin 58 | curOutputIdx <= zeroExtend(peIdx); 59 | end else begin 60 | curOutputIdx <= curOutputIdx + fromInteger(valueOf(PeWays)); 61 | end 62 | end else begin 63 | curMacIdx <= curMacIdx + 1; 64 | end 65 | fmult.put(inf, wf); 66 | 67 | if ( curMacIdx == 0 ) begin 68 | addForwardQ.enq(unpack(0)); // float '0' 69 | end else begin 70 | partialSumQ2.deq; 71 | addForwardQ.enq(partialSumQ2.first); 72 | end 73 | endrule 74 | 75 | rule enqAdd; 76 | let mr <- fmult.get; 77 | fadd.put(mr,addForwardQ.first); 78 | addForwardQ.deq; 79 | endrule 80 | 81 | 82 | rule relayMacResult; 83 | let d <- fadd.get; 84 | partialSumQ.enq(d); 85 | endrule 86 | 87 | rule relayPartialIdx; 88 | partialSumIdxQ1.deq; 89 | partialSumIdxQ2.enq(partialSumIdxQ1.first); 90 | endrule 91 | rule relayPartialIdx2; 92 | partialSumIdxQ2.deq; 93 | partialSumIdxQ3.enq(partialSumIdxQ2.first); 94 | endrule 95 | rule filterDoneResults; 96 | partialSumIdxQ3.deq; 97 | let psi = partialSumIdxQ3.first; 98 | 99 | partialSumQ.deq; 100 | let ps = partialSumQ.first; 101 | if (tpl_3(psi)+1 == fromInteger(inputDim) ) begin 102 | outputQ.enq(tuple3(ps, tpl_1(psi), tpl_2(psi))); 103 | //$write( "Row done %d %d\n", tpl_1(psi), tpl_2(psi) ); 104 | end else begin 105 | partialSumQ2.enq(ps); 106 | end 107 | endrule 108 | 109 | 110 | 111 | FIFO#(Float) weightInQ <- mkFIFO; 112 | rule relayWeightIn; 113 | weightInQ.deq; 114 | weightQ.enq(weightInQ.first); 115 | endrule 116 | method Action putInput(Float v, Bit#(8) input_idx); 117 | inputQ.enq(tuple2(v,input_idx)); 118 | endmethod 119 | method Action putWeight(Float w); 120 | weightInQ.enq(w); 121 | endmethod 122 | method ActionValue#(Tuple3#(Float, Bit#(8), Bit#(8))) resultGet; 123 | outputQ.deq; 124 | return outputQ.first; 125 | endmethod 126 | method Bool resultExist; 127 | return outputQ.notEmpty; 128 | endmethod 129 | endmodule 130 | 131 | 132 | 133 | interface NnFcIfc; 134 | method Action dataIn(Float value, Bit#(8) input_idx); 135 | method Action weightIn(Float weight); 136 | method ActionValue#(Tuple3#(Float, Bit#(8), Bit#(8))) dataOut; 137 | endinterface 138 | 139 | (* synthesize *) 140 | module mkNnFc(NnFcIfc); 141 | Vector#(PeWays, MacPeIfc) pes; 142 | Vector#(PeWays, FIFO#(Float)) weightInQs <- replicateM(mkFIFO1); 143 | Vector#(PeWays, FIFO#(Tuple2#(Float,Bit#(8)))) dataInQs <- replicateM(mkFIFO1); 144 | Vector#(PeWays, FIFO#(Tuple3#(Float,Bit#(8),Bit#(8)))) resultOutQs <- replicateM(mkFIFO1); 145 | 146 | for (Integer i = 0; i < valueOf(PeWays); i=i+1 ) begin 147 | pes[i] <- mkMacPe(fromInteger(i)); 148 | 149 | Reg#(Bit#(16)) weightInIdx <- mkReg(0); 150 | rule forwardWeights; 151 | weightInQs[i].deq; 152 | let w = weightInQs[i].first; 153 | if ( i < valueOf(PeWays)-1 ) begin 154 | weightInQs[i+1].enq(w); 155 | end 156 | 157 | weightInIdx <= weightInIdx + 1; 158 | Bit#(PeWaysLog) target = truncate(weightInIdx); 159 | if ( target == fromInteger(i) ) begin 160 | //$write( "Weight in %d\n", target ); 161 | pes[i].putWeight(w); 162 | end 163 | endrule 164 | rule forwardInput; 165 | dataInQs[i].deq; 166 | let d = dataInQs[i].first; 167 | if ( i < valueOf(PeWays)-1 ) begin 168 | dataInQs[i+1].enq(d); 169 | end 170 | pes[i].putInput(tpl_1(d), tpl_2(d)); 171 | endrule 172 | rule forwardResult; 173 | if ( pes[i].resultExist ) begin 174 | let d <- pes[i].resultGet; 175 | resultOutQs[i].enq(d); 176 | end else if ( i < valueOf(PeWays)-1 ) begin 177 | resultOutQs[i+1].deq; 178 | resultOutQs[i].enq(resultOutQs[i+1].first); 179 | end 180 | endrule 181 | end 182 | 183 | 184 | method Action dataIn(Float value, Bit#(8) input_idx); 185 | dataInQs[0].enq(tuple2(value,input_idx)); 186 | endmethod 187 | method Action weightIn(Float weight); 188 | weightInQs[0].enq(weight); 189 | //$write( "Received weight %x\n", weight ); 190 | endmethod 191 | method ActionValue#(Tuple3#(Float, Bit#(8), Bit#(8))) dataOut; 192 | resultOutQs[0].deq; 193 | return resultOutQs[0].first; 194 | endmethod 195 | endmodule 196 | -------------------------------------------------------------------------------- /src/bsv_verilog/FIFO2.v: -------------------------------------------------------------------------------- 1 | 2 | // Copyright (c) 2000-2012 Bluespec, Inc. 3 | 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files (the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions: 10 | 11 | // The above copyright notice and this permission notice shall be included in 12 | // all copies or substantial portions of the Software. 13 | 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | // THE SOFTWARE. 21 | // 22 | // $Revision: 29755 $ 23 | // $Date: 2012-10-22 13:58:12 +0000 (Mon, 22 Oct 2012) $ 24 | 25 | `ifdef BSV_ASSIGNMENT_DELAY 26 | `else 27 | `define BSV_ASSIGNMENT_DELAY 28 | `endif 29 | 30 | `ifdef BSV_POSITIVE_RESET 31 | `define BSV_RESET_VALUE 1'b1 32 | `define BSV_RESET_EDGE posedge 33 | `else 34 | `define BSV_RESET_VALUE 1'b0 35 | `define BSV_RESET_EDGE negedge 36 | `endif 37 | 38 | `ifdef BSV_ASYNC_RESET 39 | `define BSV_ARESET_EDGE_META or `BSV_RESET_EDGE RST 40 | `else 41 | `define BSV_ARESET_EDGE_META 42 | `endif 43 | 44 | `ifdef BSV_RESET_FIFO_HEAD 45 | `define BSV_ARESET_EDGE_HEAD `BSV_ARESET_EDGE_META 46 | `else 47 | `define BSV_ARESET_EDGE_HEAD 48 | `endif 49 | 50 | // Depth 2 FIFO 51 | module FIFO2(CLK, 52 | RST, 53 | D_IN, 54 | ENQ, 55 | FULL_N, 56 | D_OUT, 57 | DEQ, 58 | EMPTY_N, 59 | CLR); 60 | 61 | parameter width = 1; 62 | parameter guarded = 1; 63 | 64 | input CLK ; 65 | input RST ; 66 | input [width - 1 : 0] D_IN; 67 | input ENQ; 68 | input DEQ; 69 | input CLR ; 70 | 71 | output FULL_N; 72 | output EMPTY_N; 73 | output [width - 1 : 0] D_OUT; 74 | 75 | reg full_reg; 76 | reg empty_reg; 77 | reg [width - 1 : 0] data0_reg; 78 | reg [width - 1 : 0] data1_reg; 79 | 80 | assign FULL_N = full_reg ; 81 | assign EMPTY_N = empty_reg ; 82 | assign D_OUT = data0_reg ; 83 | 84 | 85 | // Optimize the loading logic since state encoding is not power of 2! 86 | wire d0di = (ENQ && ! empty_reg ) || ( ENQ && DEQ && full_reg ) ; 87 | wire d0d1 = DEQ && ! full_reg ; 88 | wire d0h = ((! DEQ) && (! ENQ )) || (!DEQ && empty_reg ) || ( ! ENQ &&full_reg) ; 89 | wire d1di = ENQ & empty_reg ; 90 | 91 | `ifdef BSV_NO_INITIAL_BLOCKS 92 | `else // not BSV_NO_INITIAL_BLOCKS 93 | // synopsys translate_off 94 | initial 95 | begin 96 | data0_reg = {((width + 1)/2) {2'b10}} ; 97 | data1_reg = {((width + 1)/2) {2'b10}} ; 98 | empty_reg = 1'b0; 99 | full_reg = 1'b1; 100 | end // initial begin 101 | // synopsys translate_on 102 | `endif // BSV_NO_INITIAL_BLOCKS 103 | 104 | always@(posedge CLK `BSV_ARESET_EDGE_META) 105 | begin 106 | if (RST == `BSV_RESET_VALUE) 107 | begin 108 | empty_reg <= `BSV_ASSIGNMENT_DELAY 1'b0; 109 | full_reg <= `BSV_ASSIGNMENT_DELAY 1'b1; 110 | end // if (RST == `BSV_RESET_VALUE) 111 | else 112 | begin 113 | if (CLR) 114 | begin 115 | empty_reg <= `BSV_ASSIGNMENT_DELAY 1'b0; 116 | full_reg <= `BSV_ASSIGNMENT_DELAY 1'b1; 117 | end // if (CLR) 118 | else if ( ENQ && ! DEQ ) // just enq 119 | begin 120 | empty_reg <= `BSV_ASSIGNMENT_DELAY 1'b1; 121 | full_reg <= `BSV_ASSIGNMENT_DELAY ! empty_reg ; 122 | end 123 | else if ( DEQ && ! ENQ ) 124 | begin 125 | full_reg <= `BSV_ASSIGNMENT_DELAY 1'b1; 126 | empty_reg <= `BSV_ASSIGNMENT_DELAY ! full_reg; 127 | end // if ( DEQ && ! ENQ ) 128 | end // else: !if(RST == `BSV_RESET_VALUE) 129 | 130 | end // always@ (posedge CLK or `BSV_RESET_EDGE RST) 131 | 132 | 133 | always@(posedge CLK `BSV_ARESET_EDGE_HEAD) 134 | begin 135 | `ifdef BSV_RESET_FIFO_HEAD 136 | if (RST == `BSV_RESET_VALUE) 137 | begin 138 | data0_reg <= `BSV_ASSIGNMENT_DELAY {width {1'b0}} ; 139 | data1_reg <= `BSV_ASSIGNMENT_DELAY {width {1'b0}} ; 140 | end 141 | else 142 | `endif 143 | begin 144 | data0_reg <= `BSV_ASSIGNMENT_DELAY 145 | {width{d0di}} & D_IN | {width{d0d1}} & data1_reg | {width{d0h}} & data0_reg ; 146 | data1_reg <= `BSV_ASSIGNMENT_DELAY 147 | d1di ? D_IN : data1_reg ; 148 | end // else: !if(RST == `BSV_RESET_VALUE) 149 | end // always@ (posedge CLK or `BSV_RESET_EDGE RST) 150 | 151 | 152 | 153 | // synopsys translate_off 154 | always@(posedge CLK) 155 | begin: error_checks 156 | reg deqerror, enqerror ; 157 | 158 | deqerror = 0; 159 | enqerror = 0; 160 | if (RST == ! `BSV_RESET_VALUE) 161 | begin 162 | if ( ! empty_reg && DEQ ) 163 | begin 164 | deqerror = 1; 165 | $display( "Warning: FIFO2: %m -- Dequeuing from empty fifo" ) ; 166 | end 167 | if ( ! full_reg && ENQ && (!DEQ || guarded) ) 168 | begin 169 | enqerror = 1; 170 | $display( "Warning: FIFO2: %m -- Enqueuing to a full fifo" ) ; 171 | end 172 | end 173 | end // always@ (posedge CLK) 174 | // synopsys translate_on 175 | 176 | endmodule 177 | -------------------------------------------------------------------------------- /projects/matrix4x4/HwMain.bsv: -------------------------------------------------------------------------------- 1 | import Clocks :: *; 2 | import Vector::*; 3 | import FIFO::*; 4 | import BRAM::*; 5 | import BRAMFIFO::*; 6 | import Uart::*; 7 | import Sdram::*; 8 | 9 | import SimpleFloat::*; 10 | import FloatingPoint::*; 11 | 12 | interface HwMainIfc; 13 | method ActionValue#(Bit#(8)) serial_tx; 14 | method Action serial_rx(Bit#(8) rx); 15 | endinterface 16 | 17 | module mkHwMain#(Ulx3sSdramUserIfc mem) (HwMainIfc); 18 | Clock curclk <- exposeCurrentClock; 19 | Reset currst <- exposeCurrentReset; 20 | 21 | Reg#(Bit#(32)) cycles <- mkReg(0); 22 | rule incCyclecount; 23 | cycles <= cycles + 1; 24 | endrule 25 | 26 | Reg#(Bit#(32)) processingStartCycle <- mkReg(0); 27 | 28 | Reg#(Bit#(5)) inputEnqueued <- mkReg(0); 29 | 30 | FIFO#(Bit#(32)) inputAQ <- mkSizedBRAMFIFO(32); 31 | FIFO#(Bit#(32)) inputBQ <- mkSizedBRAMFIFO(32); 32 | FIFO#(Bit#(32)) outputQ <- mkSizedBRAMFIFO(32); 33 | 34 | Reg#(Vector#(4,Vector#(4,Float))) matrixA <- mkReg(replicate(replicate(0))); 35 | Reg#(Vector#(4,Vector#(4,Float))) matrixB <- mkReg(replicate(replicate(0))); 36 | Reg#(Vector#(4,Vector#(4,Float))) matrixC <- mkReg(replicate(replicate(0))); 37 | 38 | FloatTwoOp fmult <- mkFloatMult; 39 | FloatTwoOp fadd <- mkFloatAdd; 40 | 41 | Reg#(Bit#(5)) loadMatrixCnt <- mkReg(0); 42 | Reg#(Bit#(2)) loadMatrixCol <- mkReg(0); 43 | Reg#(Bit#(2)) loadMatrixRow <- mkReg(0); 44 | rule loadMatrix ( loadMatrixCnt < 16 ); 45 | inputAQ.deq; 46 | inputBQ.deq; 47 | matrixA[loadMatrixRow][loadMatrixCol] <= unpack(inputAQ.first); 48 | matrixB[loadMatrixRow][loadMatrixCol] <= unpack(inputBQ.first); 49 | loadMatrixCnt <= loadMatrixCnt + 1; 50 | loadMatrixCol <= loadMatrixCol + 1; 51 | if ( loadMatrixCol == 3 ) begin 52 | loadMatrixRow <= loadMatrixRow + 1; 53 | end 54 | endrule 55 | 56 | Reg#(Bit#(2)) procMatrixI <- mkReg(0); 57 | Reg#(Bit#(2)) procMatrixJ <- mkReg(0); 58 | Reg#(Bit#(2)) procMatrixK <- mkReg(0); 59 | Reg#(Bit#(8)) fMultReqCnt <- mkReg(0); 60 | rule procMatrixMult ( loadMatrixCnt == 16 ); 61 | Float av = matrixA[procMatrixI][procMatrixK]; 62 | Float bv = matrixB[procMatrixJ][procMatrixK]; 63 | 64 | fmult.put(av,bv); 65 | fMultReqCnt <= fMultReqCnt + 1; 66 | //$write( ">> %d %d %d ---- %x %x -- %d\n", procMatrixI, procMatrixJ, procMatrixK, av, bv, fMultReqCnt); 67 | 68 | procMatrixK <= procMatrixK + 1; 69 | if ( procMatrixK == 3 ) begin 70 | procMatrixJ <= procMatrixJ + 1; 71 | if ( procMatrixJ == 3 ) begin 72 | procMatrixI <= procMatrixI + 1; 73 | if ( procMatrixI == 3 ) begin 74 | loadMatrixCnt <= 0; 75 | //$write("Proc done!!!!!\n"); 76 | end 77 | end 78 | end 79 | endrule 80 | 81 | FIFO#(Float) fMultQ <- mkFIFO; 82 | Reg#(Bit#(8)) fMultCnt <- mkReg(0); 83 | rule relayMultRes; 84 | let v <- fmult.get; 85 | fMultQ.enq(v); 86 | fMultCnt <= fMultCnt + 1; 87 | endrule 88 | 89 | Reg#(Bit#(2)) accumulateMatrixI <- mkReg(0); 90 | Reg#(Bit#(2)) accumulateMatrixJ <- mkReg(0); 91 | Reg#(Bit#(2)) accumulateMatrixK <- mkReg(0); 92 | FIFO#(Tuple3#(Bit#(2),Bit#(2), Bool)) matrixAccumulateDestQ <- mkFIFO; 93 | Reg#(Bool) accumulateInflight <- mkReg(False); 94 | Reg#(Bool) accumulatorWaitForFlush <- mkReg(False); 95 | Reg#(Bit#(8)) accumulateCnt <- mkReg(0); 96 | rule procMatrixAdd ( !accumulateInflight && !accumulatorWaitForFlush ); 97 | //let mv <- fmult.get; 98 | let mv = fMultQ.first; 99 | fMultQ.deq; 100 | 101 | let cv = matrixC[accumulateMatrixI][accumulateMatrixJ]; 102 | 103 | if ( accumulateMatrixK == 0 ) begin 104 | fadd.put(mv,0); 105 | end else begin 106 | fadd.put(mv,cv); 107 | end 108 | accumulateInflight <= True; 109 | 110 | Bool islast = False; 111 | accumulateCnt <= accumulateCnt + 1; 112 | 113 | accumulateMatrixK <= accumulateMatrixK + 1; 114 | if ( accumulateMatrixK == 3 ) begin 115 | accumulateMatrixJ <= accumulateMatrixJ + 1; 116 | if ( accumulateMatrixJ == 3 ) begin 117 | accumulateMatrixI <= accumulateMatrixI + 1; 118 | 119 | if ( accumulateMatrixI == 3 ) begin 120 | accumulatorWaitForFlush <= True; 121 | islast = True; 122 | $write( "Acceleration done! %d cycles\n", cycles - processingStartCycle ); 123 | end 124 | end 125 | end 126 | 127 | //$write( "<< %d %d %d -- %d %x\n", accumulateMatrixI, accumulateMatrixJ, accumulateMatrixK, accumulateCnt, mv); 128 | 129 | matrixAccumulateDestQ.enq(tuple3(accumulateMatrixI,accumulateMatrixJ, islast)); 130 | endrule 131 | 132 | 133 | Reg#(Bit#(8)) accumulateResCnt <- mkReg(0); 134 | Reg#(Bool) startOutputFlush <- mkReg(False); 135 | rule recvMatrixAdd (accumulateInflight); 136 | let av <- fadd.get; 137 | //Float av = unpack(32'h3ecccccd); 138 | let dst = matrixAccumulateDestQ.first; 139 | matrixAccumulateDestQ.deq; 140 | 141 | if ( tpl_3(dst) ) begin 142 | startOutputFlush <= True; 143 | end 144 | 145 | matrixC[tpl_1(dst)][tpl_2(dst)] <= av; 146 | 147 | accumulateInflight <= False; 148 | accumulateResCnt <= accumulateResCnt + 1; 149 | 150 | //$write( "++ %d %d %s -- %d %x\n", tpl_1(dst), tpl_2(dst), tpl_3(dst)?"True":"False", accumulateResCnt, pack(av)); 151 | endrule 152 | 153 | 154 | FIFO#(Bit#(32)) flushOutQ <- mkFIFO; 155 | Reg#(Bit#(2)) flushOutputMatrixI <- mkReg(0); 156 | Reg#(Bit#(2)) flushOutputMatrixJ <- mkReg(0); 157 | rule flushOutput ( startOutputFlush ); 158 | outputQ.enq(pack(matrixC[flushOutputMatrixI][flushOutputMatrixJ])); 159 | 160 | flushOutputMatrixJ <= flushOutputMatrixJ + 1; 161 | if ( flushOutputMatrixJ == 3 ) begin 162 | flushOutputMatrixI <= flushOutputMatrixI + 1; 163 | if ( flushOutputMatrixI == 3 ) begin 164 | startOutputFlush <= False; 165 | accumulatorWaitForFlush <= False; 166 | end 167 | end 168 | endrule 169 | 170 | 171 | 172 | 173 | 174 | 175 | Reg#(Vector#(4,Bit#(8))) outputDeSerializer <- mkReg(?); 176 | Reg#(Bit#(2)) outputDeSerializerIdx <- mkReg(0); 177 | 178 | Reg#(Vector#(4,Bit#(8))) inputDeSerializer <- mkReg(?); 179 | Reg#(Bit#(2)) inputDeSerializerIdx <- mkReg(0); 180 | 181 | method ActionValue#(Bit#(8)) serial_tx; 182 | Bit#(8) ret = 0; 183 | if ( outputDeSerializerIdx == 0 ) begin 184 | outputQ.deq; 185 | Vector#(4,Bit#(8)) ser_value = unpack(outputQ.first); 186 | 187 | outputDeSerializer <= ser_value; 188 | ret = ser_value[0]; 189 | end else begin 190 | ret = outputDeSerializer[outputDeSerializerIdx]; 191 | end 192 | outputDeSerializerIdx <= outputDeSerializerIdx + 1; 193 | return ret; 194 | endmethod 195 | method Action serial_rx(Bit#(8) d); 196 | Vector#(4,Bit#(8)) des_value = inputDeSerializer; 197 | des_value[inputDeSerializerIdx] = d; 198 | inputDeSerializerIdx <= inputDeSerializerIdx + 1; 199 | inputDeSerializer <= des_value; 200 | 201 | 202 | if (inputDeSerializerIdx == 3 ) begin 203 | // How is input being split to A and B correctly, even when there is more than 32 inputs? 204 | if ( inputEnqueued < 16 ) begin 205 | inputAQ.enq(pack(des_value)); 206 | end else begin 207 | inputBQ.enq(pack(des_value)); 208 | end 209 | inputEnqueued <= inputEnqueued + 1; 210 | 211 | if (inputEnqueued == 31 ) begin 212 | processingStartCycle <= cycles; 213 | end 214 | end 215 | endmethod 216 | endmodule 217 | -------------------------------------------------------------------------------- /src/SimpleFloat.bsv: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // Sang-Woo Jun, 2021 3 | //////////////////////////////////////////////////////////////////////////////// 4 | // Implements simplified floating point operations for Lattice ECP5 5 | // Fractions are truncated to most significant 18 bits, 6 | // for efficiency with the 18x18D multipliers on the ECP5 7 | // Subnormal numbers are ignored 8 | // NaN, Inf, etc are all ignored 9 | //////////////////////////////////////////////////////////////////////////////// 10 | 11 | 12 | 13 | 14 | package SimpleFloat; 15 | 16 | import FIFO::*; 17 | import FloatingPoint::*; 18 | import Mult18x18D::*; 19 | 20 | interface FloatTwoOp; 21 | method Action put(Float a, Float b); 22 | method ActionValue#(Float) get; 23 | endinterface 24 | 25 | 26 | 27 | module mkFloatMult(FloatTwoOp); 28 | FIFO#(Bit#(32)) outQ <- mkFIFO; 29 | 30 | `ifdef BSIM 31 | FIFO#(Float) bsimOutQ <- mkFIFO; 32 | rule bsimRelay; 33 | bsimOutQ.deq; 34 | outQ.enq(pack(bsimOutQ.first)); 35 | endrule 36 | `else 37 | Mult18x18DIfc mult18 <- mkMult18x18D; 38 | FIFO#(Tuple3#(Bit#(1),Bit#(9),Bool)) signExpZeroQ <- mkSizedFIFO(6); 39 | rule procMultResult; 40 | Bit#(36) mres <- mult18.get; 41 | signExpZeroQ.deq; 42 | Bit#(1) sign = tpl_1(signExpZeroQ.first); 43 | Bit#(9) expsum = tpl_2(signExpZeroQ.first); 44 | Bool isZero = tpl_3(signExpZeroQ.first); 45 | Bit#(18) newfrac; 46 | Bit#(8) newexp; 47 | if ( mres[35] != 0 ) begin 48 | newfrac = truncate(mres>>17); 49 | newexp = truncate(expsum-126); 50 | end else begin 51 | newfrac = truncate(mres>>16); 52 | newexp = truncate(expsum-127); 53 | end 54 | 55 | 56 | if ( isZero ) outQ.enq(0); 57 | else outQ.enq({sign,newexp,newfrac,0}); 58 | endrule 59 | `endif 60 | 61 | method Action put(Float a, Float b); 62 | `ifdef BSIM 63 | let r = multFP(a,b, defaultValue); 64 | bsimOutQ.enq(tpl_1(r)); 65 | `else 66 | Bit#(32) bina = pack(a); 67 | Bit#(32) binb = pack(b); 68 | Bit#(18) fraca = zeroExtend(bina[22:6])|(1<<17); 69 | Bit#(18) fracb = zeroExtend(binb[22:6])|(1<<17); 70 | mult18.put(fraca, fracb); 71 | Bit#(1) newsign = bina[31] ^ binb[31]; 72 | Bit#(8) expa = bina[30:23]; 73 | Bit#(8) expb = binb[30:23]; 74 | Bool isZero = (expa==0)||(expb==0); 75 | Bit#(9) expsum = zeroExtend(bina[30:23])+zeroExtend(binb[30:23]); 76 | signExpZeroQ.enq(tuple3(newsign,expsum,isZero)); 77 | `endif 78 | endmethod 79 | method ActionValue#(Float) get; 80 | outQ.deq; 81 | return unpack(outQ.first); 82 | endmethod 83 | endmodule 84 | 85 | typedef struct {Bit#(18) frac;Bit#(8) expo;Bit#(1) sign;} FloatParts deriving (Bits,Eq); 86 | 87 | module mkFloatAdd(FloatTwoOp); 88 | FIFO#(Tuple2#(Bit#(32),Bit#(32))) inQ <- mkFIFO; 89 | FIFO#(Tuple4#(Bool,Bit#(8),FloatParts,FloatParts)) inProcQ <- mkFIFO; 90 | FIFO#(Tuple3#(Bool,FloatParts,FloatParts)) calcQ <- mkFIFO; 91 | FIFO#(Tuple3#(Bit#(1),Bit#(8),Bit#(19))) normalizeQ <- mkFIFO; 92 | FIFO#(Tuple3#(Bit#(1),Bit#(8),Bit#(19))) normalizeQ1 <- mkFIFO; 93 | FIFO#(Tuple3#(Bit#(1),Bit#(8),Bit#(19))) normalizeQ2 <- mkFIFO; 94 | FIFO#(Tuple3#(Bit#(1),Bit#(8),Bit#(19))) normalizeQ3 <- mkFIFO; 95 | FIFO#(Tuple3#(Bit#(1),Bit#(8),Bit#(19))) normalizeQ4 <- mkFIFO; 96 | FIFO#(Bit#(32)) outQ <- mkFIFO; 97 | 98 | rule procIn; 99 | inQ.deq; 100 | let d_ = inQ.first; 101 | let bina = tpl_1(d_); 102 | let binb = tpl_2(d_); 103 | 104 | Bit#(18) fraca = zeroExtend(bina[22:6])|(1<<17); 105 | Bit#(18) fracb = zeroExtend(binb[22:6])|(1<<17); 106 | Bit#(8) expa = bina[30:23]; 107 | Bit#(8) expb = binb[30:23]; 108 | Bit#(1) signa = bina[31]; 109 | Bit#(1) signb = binb[31]; 110 | 111 | Bool alarger = ((expa>expb) || (expa==expb&&fraca>fracb)); 112 | Bit#(8) expdiff = (expa>expb)?(expa-expb):(expb-expa); 113 | //Bit#(5) expshift = truncate(expdiff); 114 | //if ( expdiff > 18 ) expshift = 5'b11111; 115 | inProcQ.enq(tuple4(alarger,expdiff, 116 | FloatParts{frac:fraca, expo:expa, sign:signa}, 117 | FloatParts{frac:fracb, expo:expb, sign:signb} 118 | )); 119 | 120 | endrule 121 | rule calcFrac; 122 | inProcQ.deq; 123 | let d_ = inProcQ.first; 124 | Bool alarger = tpl_1(d_); 125 | Bit#(8) expshift = tpl_2(d_); 126 | let ta = (tpl_3(d_)); 127 | let tb = (tpl_4(d_)); 128 | let fraca = ta.frac; 129 | let fracb = tb.frac; 130 | let expa = ta.expo; 131 | let expb = tb.expo; 132 | let signa = ta.sign; 133 | let signb = tb.sign; 134 | fraca = (alarger?fraca:(fraca>>expshift)); 135 | fracb = (alarger?(fracb>>expshift):fracb); 136 | calcQ.enq(tuple3(alarger, 137 | FloatParts{frac:fraca, expo:expa, sign:signa}, 138 | FloatParts{frac:fracb, expo:expb, sign:signb} 139 | )); 140 | endrule 141 | rule calcFrac2; 142 | calcQ.deq; 143 | let d_ = calcQ.first; 144 | Bool alarger = tpl_1(d_); 145 | let ta = (tpl_2(d_)); 146 | let tb = (tpl_3(d_)); 147 | let fraca = ta.frac; 148 | let fracb = tb.frac; 149 | let expa = ta.expo; 150 | let expb = tb.expo; 151 | let signa = ta.sign; 152 | let signb = tb.sign; 153 | Bit#(19) newfrac; 154 | Bit#(8) newexp = alarger?expa:expb; 155 | Bit#(1) newsign = alarger?signa:signb; 156 | if ( signa == signb ) begin 157 | newfrac = zeroExtend(fraca)+zeroExtend(fracb); 158 | end else begin 159 | newfrac = (fraca>fracb)?zeroExtend(fraca-fracb):zeroExtend(fracb-fraca); 160 | end 161 | 162 | normalizeQ.enq(tuple3(newsign,newexp,newfrac)); 163 | endrule 164 | 165 | rule normalize; 166 | normalizeQ.deq; 167 | let d_ = normalizeQ.first; 168 | let newsign = tpl_1(d_); 169 | let newexp = tpl_2(d_); 170 | let newfrac = tpl_3(d_); 171 | 172 | if ( newfrac[18] == 1 ) begin 173 | newfrac = newfrac>>1; 174 | newexp = newexp + 1; 175 | end 176 | normalizeQ1.enq(tuple3(newsign,newexp, newfrac)); 177 | endrule 178 | rule normalize1; 179 | normalizeQ1.deq; 180 | let d_ = normalizeQ1.first; 181 | let newsign = tpl_1(d_); 182 | let newexp = tpl_2(d_); 183 | let newfrac = tpl_3(d_); 184 | 185 | if ( newfrac[17:9] == 0 && newfrac != 0 ) begin 186 | newfrac = newfrac << 9; 187 | newexp = newexp - 9; 188 | end 189 | normalizeQ2.enq(tuple3(newsign,newexp, newfrac)); 190 | endrule 191 | rule normalize2; 192 | normalizeQ2.deq; 193 | let d_ = normalizeQ2.first; 194 | let newsign = tpl_1(d_); 195 | let newexp = tpl_2(d_); 196 | let newfrac = tpl_3(d_); 197 | if ( newfrac[17:14] == 0 && newfrac != 0 ) begin 198 | newfrac = newfrac << 5; 199 | newexp = newexp - 5; 200 | end 201 | normalizeQ3.enq(tuple3(newsign,newexp, newfrac)); 202 | endrule 203 | rule normalize3; 204 | normalizeQ3.deq; 205 | let d_ = normalizeQ3.first; 206 | let newsign = tpl_1(d_); 207 | let newexp = tpl_2(d_); 208 | let newfrac = tpl_3(d_); 209 | if ( newfrac[17:16] == 0 && newfrac != 0 ) begin 210 | newfrac = newfrac << 2; 211 | newexp = newexp - 2; 212 | end 213 | normalizeQ4.enq(tuple3(newsign,newexp, newfrac)); 214 | endrule 215 | rule normalize4; 216 | normalizeQ4.deq; 217 | let d_ = normalizeQ4.first; 218 | let newsign = tpl_1(d_); 219 | let newexp = tpl_2(d_); 220 | let newfrac = tpl_3(d_); 221 | if ( newfrac[17] == 0 && newfrac != 0 ) begin 222 | newfrac = newfrac << 1; 223 | newexp = newexp - 1; 224 | end 225 | Bit#(17) newfrace = truncate(newfrac); 226 | outQ.enq({newsign,newexp,newfrace,0}); 227 | endrule 228 | 229 | 230 | 231 | method Action put(Float a, Float b); 232 | Bit#(32) bina = pack(a); 233 | Bit#(32) binb = pack(b); 234 | inQ.enq(tuple2(bina,binb)); 235 | endmethod 236 | method ActionValue#(Float) get; 237 | outQ.deq; 238 | return unpack(outQ.first); 239 | endmethod 240 | endmodule 241 | 242 | endpackage: SimpleFloat 243 | -------------------------------------------------------------------------------- /src/Sdram.bsv: -------------------------------------------------------------------------------- 1 | package Sdram; 2 | 3 | import FIFO::*; 4 | import FIFOF::*; 5 | import Vector::*; 6 | 7 | 8 | // TODO clock either needs to be shifted, or commands need to be held for longer 9 | 10 | 11 | 12 | import "BDPI" function Action bdpiWriteSdram(Bit#(32) addr, Bit#(32) data); 13 | import "BDPI" function ActionValue#(Bit#(32)) bdpiReadSdram(Bit#(32) addr); 14 | 15 | function Bit#(2) extract_bank_address(Bit#(24) data); 16 | return data[10:9]; 17 | endfunction 18 | 19 | (* always_enabled, always_ready *) 20 | interface Ulx3sSdramPinsIfc; 21 | //output sdram_csn, // chip select 22 | (* prefix = "", result = "sdram_csn" *) 23 | method Bit#(1) sdram_csn(); 24 | interface Clock sdram_clk; 25 | 26 | //output sdram_rasn, // SDRAM RAS 27 | (* prefix = "", result = "sdram_rasn" *) 28 | method Bit#(1) sdram_rasn(); 29 | 30 | //output sdram_casn, // SDRAM CAS 31 | (* prefix = "", result = "sdram_casn" *) 32 | method Bit#(1) sdram_casn(); 33 | 34 | //output sdram_wen, // SDRAM write-enable 35 | (* prefix = "", result = "sdram_wen" *) 36 | method Bit#(1) sdram_wen(); 37 | 38 | //output [12:0] sdram_a, // SDRAM address bus 39 | (* prefix = "", result = "sdram_a" *) 40 | method Bit#(13) sdram_a(); 41 | 42 | //output [1:0] sdram_ba, // SDRAM bank-address 43 | (* prefix = "", result = "sdram_ba" *) 44 | method Bit#(2) sdram_ba(); 45 | 46 | //output [1:0] sdram_dqm, // byte select 47 | (* prefix = "", result = "sdram_dqm" *) 48 | method Bit#(2) sdram_dqm(); 49 | 50 | //inout [15:0] sdram_d, // data bus to/from SDRAM 51 | (* prefix = "XX_sdram_d_XX" *) 52 | interface Inout#(Bit#(16)) sdram_d; 53 | endinterface 54 | 55 | interface Ulx3sSdramUserIfc; 56 | method Action req(Bit#(24) addr, Bit#(16) data, Bool write); 57 | method ActionValue#(Bit#(16)) readResp; 58 | endinterface 59 | 60 | interface Ulx3sSdramIfc; 61 | `ifndef BSIM 62 | (* prefix="" *) 63 | interface Ulx3sSdramPinsIfc pins; 64 | `endif 65 | interface Ulx3sSdramUserIfc user; 66 | endinterface 67 | 68 | 69 | typedef enum {IDLE, REFRESH1, REFRESH2, CONFIG, RDWR, READREADY, WAIT} ControllerState deriving (Eq,Bits); 70 | Bit#(4) command_NOP = 4'b1000; 71 | Bit#(4) command_PRECHARGE = 4'b0001; 72 | Bit#(4) command_AUTOREFRESH = 4'b0100; 73 | Bit#(4) command_MODESET = 4'b0000; 74 | Bit#(4) command_READ = 4'b0110; 75 | Bit#(4) command_WRITE = 4'b0010; 76 | Bit#(4) command_ACTIVATE = 4'b0101; 77 | 78 | Bit#(4) delay_tRP = 3; 79 | Bit#(4) delay_tMRD = 2; 80 | Bit#(4) delay_tRCD = 3; 81 | Bit#(4) delay_tRC = 9; 82 | Bit#(4) delay_CL = 3; 83 | 84 | Bit#(13) addr_MODE = 13'b000_1_00_011_0_000; 85 | 86 | 87 | module mkUlx3sSdram#(Clock sdram_clk, Integer clock_mhz) (Ulx3sSdramIfc); 88 | Clock curclk <- exposeCurrentClock; 89 | 90 | FIFOF#(Bit#(16)) readRespQ <- mkFIFOF; 91 | FIFOF#(Tuple3#(Bit#(24),Bit#(16), Bool)) reqQ <- mkFIFOF; 92 | 93 | Integer init_cycles = 100 * clock_mhz; 94 | Integer rf_cycles = clock_mhz * 78 / 10; 95 | 96 | 97 | Reg#(Bit#(4)) command_out <- mkReg(command_NOP); 98 | // 11 during init, 00 after 99 | Reg#(Bit#(2)) dqm <- mkReg(2'b11); 100 | Reg#(Bit#(13)) addr_out <- mkReg(0); 101 | 102 | 103 | Reg#(ControllerState) state <- mkReg(IDLE); 104 | Reg#(ControllerState) state_next <- mkReg(IDLE); 105 | 106 | `ifndef BSIM 107 | Inout16Ifc xx_inout16_XX <- mkInout16(curclk); 108 | `endif 109 | 110 | Reg#(Bit#(16)) counter <- mkReg(0); 111 | // wait for chip to finish command 112 | Reg#(Bit#(4)) delay <- mkReg(0); 113 | 114 | Reg#(Bool) cur_cmd_isRead <- mkReg(False); 115 | Reg#(Bit#(24)) cur_cmd_address <- mkReg(0); 116 | Reg#(Bit#(16)) cur_cmd_wdata <- mkReg(0); 117 | 118 | Reg#(Bit#(4)) open_bank <- mkReg(0); 119 | Reg#(Vector#(4,Bit#(13))) open_rows <- mkReg(replicate(0)); 120 | 121 | // wait for SDRAM chip to init 122 | rule init( dqm != 0 && state == IDLE ); 123 | if ( counter + 1 >= fromInteger(init_cycles) ) begin 124 | counter <= 0; 125 | state <= REFRESH1; 126 | end else counter <= counter + 1; 127 | endrule 128 | 129 | rule controllerFSM ( state != IDLE || dqm == 0 ); 130 | // wait until we have to refresh again 131 | if ( state == REFRESH2 ) counter <= 0; 132 | else counter <= counter + 1; 133 | 134 | case (state) 135 | IDLE: begin 136 | if ( counter >= fromInteger(rf_cycles) ) state <= REFRESH1; 137 | else if ( reqQ.notEmpty ) begin 138 | reqQ.deq; 139 | let r = reqQ.first; 140 | let raddr = tpl_1(r); 141 | let wdata = tpl_2(r); 142 | cur_cmd_address <= raddr; 143 | cur_cmd_wdata <= wdata; 144 | cur_cmd_isRead <= !tpl_3(r); 145 | state <= RDWR; 146 | end 147 | end 148 | RDWR: begin 149 | Bit#(2) bank = extract_bank_address(cur_cmd_address); 150 | Bit#(13) row = cur_cmd_address[23:11]; 151 | Bit#(9) col = cur_cmd_address[8:0]; 152 | //$write( "Sdram rdwr" ); 153 | 154 | if ( open_bank[bank] == 0 ) begin 155 | command_out <= command_ACTIVATE; 156 | addr_out <= row; 157 | open_bank[bank] <= 1; 158 | open_rows[bank] <= row; 159 | delay <= delay_tRCD -2; 160 | state_next <= RDWR; 161 | state <= WAIT; 162 | //TODO FIXME: mark prev bank closed 163 | end else if ( open_rows[bank] != row ) begin 164 | command_out <= command_PRECHARGE; 165 | addr_out[10] <= 0; 166 | open_bank[bank] <= 0; // TODO I don't think it has to be closed? 167 | delay <= delay_tRP-2; 168 | state_next <= RDWR; 169 | state <= WAIT; 170 | end else begin 171 | if ( cur_cmd_isRead ) command_out <= command_READ; 172 | else command_out <= command_WRITE; 173 | addr_out <= {0,col}; 174 | delay <= delay_CL-1; 175 | state <= WAIT; 176 | if ( cur_cmd_isRead ) begin 177 | state_next <= READREADY; 178 | //$write( "Read\n" ); 179 | end else begin 180 | //$write( "Write\n" ); 181 | state_next <= IDLE; 182 | `ifndef BSIM 183 | xx_inout16_XX.write(cur_cmd_wdata); 184 | `else 185 | bdpiWriteSdram(zeroExtend(cur_cmd_address), zeroExtend(cur_cmd_wdata)); 186 | `endif 187 | end 188 | end 189 | end 190 | REFRESH1: begin 191 | command_out <= command_PRECHARGE; 192 | delay <= delay_tRP-2; 193 | addr_out[10] <= 1; 194 | open_bank <= 0; 195 | if ( dqm != 0 ) state_next <= CONFIG; 196 | else state_next <= REFRESH2; 197 | state <= WAIT; 198 | 199 | //$write( "Sdram refresh1\n" ); 200 | //$fflush(); 201 | end 202 | REFRESH2: begin 203 | command_out <= command_AUTOREFRESH; 204 | dqm <= 0; 205 | delay <= delay_tRC-2; 206 | 207 | if ( dqm != 0 ) state_next <= REFRESH2; // repeat again 208 | else state_next <= IDLE; 209 | state <= WAIT; 210 | //$write( "Sdram refresh2\n" ); 211 | end 212 | CONFIG: begin 213 | command_out <= command_MODESET; 214 | addr_out <= addr_MODE; 215 | delay <= delay_tMRD-2; 216 | state_next <= REFRESH2; 217 | state <= WAIT; 218 | //$write( "Sdram config\n" ); 219 | end 220 | READREADY: begin 221 | if ( readRespQ.notFull ) begin 222 | `ifndef BSIM 223 | let d = xx_inout16_XX.read; 224 | readRespQ.enq(d); 225 | `else 226 | let d <- bdpiReadSdram(zeroExtend(cur_cmd_address)); 227 | readRespQ.enq(truncate(d)); 228 | `endif 229 | //$write( "Read data %x\n", d ); 230 | state <= IDLE; 231 | end 232 | end 233 | WAIT: begin 234 | if ( delay != 0 ) delay <= delay - 1; 235 | else state <= state_next; 236 | end 237 | endcase 238 | endrule 239 | 240 | 241 | 242 | 243 | 244 | 245 | `ifndef BSIM 246 | interface Ulx3sSdramPinsIfc pins; 247 | interface sdram_clk = sdram_clk; 248 | method Bit#(1) sdram_csn(); 249 | return command_out[3]; 250 | endmethod 251 | method Bit#(1) sdram_wen(); 252 | return command_out[2]; 253 | endmethod 254 | method Bit#(1) sdram_rasn(); 255 | return command_out[1]; 256 | endmethod 257 | method Bit#(1) sdram_casn(); 258 | return command_out[0]; 259 | endmethod 260 | method Bit#(2) sdram_dqm(); 261 | return dqm; 262 | endmethod 263 | method Bit#(13) sdram_a(); 264 | return addr_out; 265 | endmethod 266 | method Bit#(2) sdram_ba(); 267 | Bit#(2) bank = extract_bank_address(cur_cmd_address); 268 | 269 | if ( state == CONFIG ) return 0; 270 | else return bank; 271 | endmethod 272 | interface sdram_d = xx_inout16_XX.inout_pins; 273 | endinterface 274 | `endif 275 | 276 | interface Ulx3sSdramUserIfc user; 277 | method Action req(Bit#(24) addr, Bit#(16) data, Bool write); 278 | reqQ.enq(tuple3(addr,data,write)); 279 | endmethod 280 | method ActionValue#(Bit#(16)) readResp; 281 | readRespQ.deq; 282 | return readRespQ.first; 283 | endmethod 284 | endinterface 285 | endmodule 286 | 287 | interface Inout16Ifc; 288 | interface Inout#(Bit#(16)) inout_pins; 289 | 290 | method Action write(Bit#(16) data); 291 | method Bit#(16) read; 292 | endinterface 293 | 294 | import "BVI" inout16 = 295 | module mkInout16#(Clock curclk) (Inout16Ifc); 296 | default_clock no_clock; 297 | default_reset no_reset; 298 | 299 | input_clock (clk) = curclk; 300 | 301 | ifc_inout inout_pins(inout_pins); 302 | 303 | method write(write_data) enable(write_req) clocked_by(curclk); 304 | method read_data read; 305 | 306 | schedule ( 307 | write, read 308 | ) CF ( 309 | write, read 310 | ); 311 | endmodule 312 | 313 | endpackage: Sdram 314 | -------------------------------------------------------------------------------- /projects/rv32i/processor/Decode.bsv: -------------------------------------------------------------------------------- 1 | import Defines::*; 2 | 3 | typedef enum { OP, OPIMM, BRANCH, LUI, JAL, JALR, LOAD, STORE, AUIPC, Unsupported} IType deriving (Bits, Eq, FShow); 4 | typedef enum {Eq, Neq, Lt, Ltu, Ge, Geu, AT, NT} BrFunc deriving (Bits, Eq, FShow); 5 | typedef enum {Add, Sub, And, Or, Xor, Slt, Sltu, Sll, Srl, Sra, Mul} AluFunc deriving (Bits, Eq, FShow); 6 | typedef enum {ImmI, ImmS, ImmB, ImmU, ImmJ, NoImm} ImmType deriving (Bits, Eq, FShow); 7 | 8 | // Opcode 9 | Bit#(7) opOpImm = 7'b0010011; 10 | Bit#(7) opOp = 7'b0110011; 11 | Bit#(7) opLui = 7'b0110111; 12 | Bit#(7) opJal = 7'b1101111; 13 | Bit#(7) opJalr = 7'b1100111; 14 | Bit#(7) opBranch = 7'b1100011; 15 | Bit#(7) opLoad = 7'b0000011; 16 | Bit#(7) opStore = 7'b0100011; 17 | Bit#(7) opAuipc = 7'b0010111; 18 | Bit#(7) opSystem = 7'b1110011; 19 | 20 | // funct3 - ALU 21 | Bit#(3) fnADD = 3'b000; 22 | Bit#(3) fnSLL = 3'b001; 23 | Bit#(3) fnSLT = 3'b010; 24 | Bit#(3) fnSLTU = 3'b011; 25 | Bit#(3) fnXOR = 3'b100; 26 | Bit#(3) fnSR = 3'b101; 27 | Bit#(3) fnOR = 3'b110; 28 | Bit#(3) fnAND = 3'b111; 29 | // funct3 - Branch 30 | Bit#(3) fnBEQ = 3'b000; 31 | Bit#(3) fnBNE = 3'b001; 32 | Bit#(3) fnBLT = 3'b100; 33 | Bit#(3) fnBGE = 3'b101; 34 | Bit#(3) fnBLTU = 3'b110; 35 | Bit#(3) fnBGEU = 3'b111; 36 | // funct3 - Load 37 | Bit#(3) fnLW = 3'b010; 38 | Bit#(3) fnLB = 3'b000; 39 | Bit#(3) fnLH = 3'b001; 40 | Bit#(3) fnLBU = 3'b100; 41 | Bit#(3) fnLHU = 3'b101; 42 | // funct3 - Store 43 | Bit#(3) fnSW = 3'b010; 44 | Bit#(3) fnSB = 3'b000; 45 | Bit#(3) fnSH = 3'b001; 46 | // funct3 - Multiply 47 | Bit#(3) fnMUL = 3'b000; 48 | // funct3 - CSR 49 | Bit#(3) fnCSRRS = 3'b010; 50 | 51 | 52 | typedef struct { 53 | IType iType; 54 | AluFunc aluFunc; 55 | BrFunc brFunc; 56 | Bool writeDst; 57 | RIndx dst; 58 | RIndx src1; 59 | RIndx src2; 60 | Word imm; 61 | SizeType size; 62 | Bool extendSigned; 63 | } DecodedInst deriving (Bits, Eq, FShow); 64 | 65 | function DecodedInst decode(Bit#(32) inst); 66 | let opcode = inst[6:0]; 67 | let funct3 = inst[14:12]; 68 | let funct7 = inst[31:25]; 69 | let dst = inst[11:7]; 70 | let src1 = inst[19:15]; 71 | let src2 = inst[24:20]; 72 | let csr = inst[31:20]; 73 | 74 | Word immI = signExtend(inst[31:20]); 75 | Word immS = signExtend({ inst[31:25], inst[11:7] }); 76 | Word immB = signExtend({ inst[31], inst[7], inst[30:25], inst[11:8], 1'b0}); 77 | Word immU = signExtend({ inst[31:12], 12'b0 }); 78 | Word immJ = signExtend({ inst[31], inst[19:12], inst[20], inst[30:21], 1'b0}); 79 | 80 | DecodedInst dInst = ?; 81 | dInst.iType = Unsupported; 82 | dInst.dst = 0; 83 | dInst.writeDst = False; 84 | dInst.src1 = 0; 85 | dInst.src2 = 0; 86 | case(opcode) 87 | opOp: begin 88 | if (funct7 == 7'b0000000) begin 89 | case (funct3) 90 | fnADD: dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2: src2, imm: ?, brFunc: ?, aluFunc: Add, iType: OP, size: ?, extendSigned: ? }; 91 | fnSLT: dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2: src2, imm: ?, brFunc: ?, aluFunc: Slt, iType: OP, size: ?, extendSigned: ? }; 92 | fnSLTU: dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2: src2, imm: ?, brFunc: ?, aluFunc: Sltu, iType: OP, size: ?, extendSigned: ? }; 93 | fnXOR: dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2: src2, imm: ?, brFunc: ?, aluFunc: Xor, iType: OP, size: ?, extendSigned: ? }; 94 | fnOR: dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2: src2, imm: ?, brFunc: ?, aluFunc: Or, iType: OP, size: ?, extendSigned: ? }; 95 | fnAND: dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2: src2, imm: ?, brFunc: ?, aluFunc: And, iType: OP, size: ?, extendSigned: ? }; 96 | fnSLL: dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2: src2, imm: ?, brFunc: ?, aluFunc: Sll, iType: OP, size: ?, extendSigned: ? }; 97 | fnSR: dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2: src2, imm: ?, brFunc: ?, aluFunc: Srl, iType: OP, size: ?, extendSigned: ? }; 98 | endcase 99 | end else if (funct7 == 7'b0100000) begin 100 | case (funct3) 101 | fnADD: dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2: src2, imm: ?, brFunc: ?, aluFunc: Sub, iType: OP, size: ?, extendSigned: ? }; 102 | fnSR: dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2: src2, imm: ?, brFunc: ?, aluFunc: Sra, iType: OP, size: ?, extendSigned: ? }; 103 | endcase 104 | end 105 | else if (funct7 == 7'b0000001) begin 106 | //case (funct3) 107 | // case fnMUL: 108 | //endcase 109 | end 110 | end 111 | opOpImm: begin 112 | case (funct3) 113 | fnADD: dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2: 0, imm: immI, brFunc: ?, aluFunc: Add, iType: OPIMM, size: ?, extendSigned: ? }; 114 | fnSLT: dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2: 0, imm: immI, brFunc: ?, aluFunc: Slt, iType: OPIMM, size: ?, extendSigned: ? }; 115 | fnSLTU: dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2: 0, imm: immI, brFunc: ?, aluFunc: Sltu, iType: OPIMM, size: ?, extendSigned: ? }; 116 | fnXOR: dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2: 0, imm: immI, brFunc: ?, aluFunc: Xor, iType: OPIMM, size: ?, extendSigned: ? }; 117 | fnOR: dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2: 0, imm: immI, brFunc: ?, aluFunc: Or, iType: OPIMM, size: ?, extendSigned: ? }; 118 | fnAND: dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2: 0, imm: immI, brFunc: ?, aluFunc: And, iType: OPIMM, size: ?, extendSigned: ? }; 119 | fnSLL: begin 120 | if (funct7 == 7'b0000000) begin 121 | dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2: 0, imm: immI, brFunc: ?, aluFunc: Sll, iType: OPIMM, size: ?, extendSigned: ? }; 122 | end 123 | end 124 | fnSR: begin 125 | if (funct7 == 7'b0000000) begin 126 | dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2: 0, imm: immI, brFunc: ?, aluFunc: Srl, iType: OPIMM, size: ?, extendSigned: ? }; 127 | end else if (funct7 == 7'b0100000) begin 128 | dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2: 0, imm: immI, brFunc: ?, aluFunc: Sra, iType: OPIMM, size: ?, extendSigned: ? }; 129 | end 130 | end 131 | endcase 132 | end 133 | opBranch: begin 134 | case(funct3) 135 | fnBEQ: dInst = DecodedInst { dst: 0, writeDst: False, src1: src1, src2: src2, imm: immB, brFunc: Eq, aluFunc: ?, iType: BRANCH, size: ?, extendSigned: ? }; 136 | fnBNE: dInst = DecodedInst { dst: 0, writeDst: False, src1: src1, src2: src2, imm: immB, brFunc: Neq, aluFunc: ?, iType: BRANCH, size: ?, extendSigned: ? }; 137 | fnBLT: dInst = DecodedInst { dst: 0, writeDst: False, src1: src1, src2: src2, imm: immB, brFunc: Lt, aluFunc: ?, iType: BRANCH, size: ?, extendSigned: ? }; 138 | fnBGE: dInst = DecodedInst { dst: 0, writeDst: False, src1: src1, src2: src2, imm: immB, brFunc: Ge, aluFunc: ?, iType: BRANCH, size: ?, extendSigned: ? }; 139 | fnBLTU: dInst = DecodedInst { dst: 0, writeDst: False, src1: src1, src2: src2, imm: immB, brFunc: Ltu, aluFunc: ?, iType: BRANCH, size: ?, extendSigned: ? }; 140 | fnBGEU: dInst = DecodedInst { dst: 0, writeDst: False, src1: src1, src2: src2, imm: immB, brFunc: Geu, aluFunc: ?, iType: BRANCH, size: ?, extendSigned: ? }; 141 | endcase 142 | end 143 | opLui: dInst = DecodedInst { dst: dst, writeDst: True, src1: 0, src2: 0, imm: immU, brFunc: ?, aluFunc: ?, iType: LUI, size: ?, extendSigned: ? }; 144 | opJal: dInst = DecodedInst { dst: dst, writeDst: True, src1: 0, src2: 0, imm: immJ, brFunc: ?, aluFunc: ?, iType: JAL, size: ?, extendSigned: ? }; 145 | opJalr: dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2: 0, imm: immI, brFunc: ?, aluFunc: ?, iType: JALR, size: ?, extendSigned: ? }; 146 | opLoad: begin 147 | case (funct3) 148 | fnLW: dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2: 0, imm: immI, brFunc: ?, aluFunc: ?, iType: LOAD, size:3, extendSigned: ? }; 149 | fnLH: dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2: 0, imm: immI, brFunc: ?, aluFunc: ?, iType: LOAD, size:1, extendSigned: True }; 150 | fnLB: dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2: 0, imm: immI, brFunc: ?, aluFunc: ?, iType: LOAD, size:0, extendSigned: True }; 151 | fnLHU: dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2:0, imm: immI, brFunc: ?, aluFunc: ?, iType: LOAD, size:1, extendSigned: False }; 152 | fnLBU: dInst = DecodedInst { dst: dst, writeDst: True, src1: src1, src2:0, imm: immI, brFunc: ?, aluFunc: ?, iType: LOAD, size:0, extendSigned: False }; 153 | endcase 154 | end 155 | opStore: begin 156 | case (funct3) 157 | fnSW: dInst = DecodedInst { dst: 0, writeDst: False, src1: src1, src2: src2, imm: immS, brFunc: ?, aluFunc: ?, iType: STORE, size: 3, extendSigned: ? }; 158 | fnSH: dInst = DecodedInst { dst: 0, writeDst: False, src1: src1, src2: src2, imm: immS, brFunc: ?, aluFunc: ?, iType: STORE, size: 1, extendSigned: ? }; 159 | fnSB: dInst = DecodedInst { dst: 0, writeDst: False, src1: src1, src2: src2, imm: immS, brFunc: ?, aluFunc: ?, iType: STORE, size: 0, extendSigned: ? }; 160 | endcase 161 | end 162 | 163 | opAuipc: dInst = DecodedInst { dst: dst, writeDst: True, src1: 0, src2: 0, imm: immU, brFunc: ?, aluFunc: ?, iType: AUIPC, size:?, extendSigned: ? }; 164 | endcase 165 | return dInst; 166 | endfunction 167 | -------------------------------------------------------------------------------- /projects/nn_fc_accel/ZfpDecompress.bsv: -------------------------------------------------------------------------------- 1 | import Connectable::*; 2 | import FIFO::*; 3 | import FIFOF::*; 4 | import Vector::*; 5 | import SimpleFloat::*; 6 | import FloatingPoint::*; 7 | 8 | typedef 33 ZfpComprTypeSz; // e = 9, block = bit_budget(5)*4 = 20, flag = (0 or 1)*4 = 4 9 | typedef TSub#(ZfpComprTypeSz,9) CompressedBitsTotal; 10 | typedef TDiv#(CompressedBitsTotal,4) CompressedBitsEach; 11 | typedef TDiv#(TSub#(CompressedBitsTotal,4),4) BitBudget; 12 | 13 | interface ZfpDecompressIfc; 14 | method Action put(Bit#(ZfpComprTypeSz) data); 15 | method ActionValue#(Bit#(32)) get; 16 | endinterface 17 | 18 | function Bit#(9) get_e(Bit#(ZfpComprTypeSz) buffer); 19 | Bit#(9) e = 0; 20 | e = truncate(buffer); 21 | return e; 22 | endfunction 23 | function Bit#(CompressedBitsTotal) get_d(Bit#(ZfpComprTypeSz) buffer); 24 | Bit#(CompressedBitsTotal) d = 0; 25 | d = truncateLSB(buffer); 26 | return d; 27 | endfunction 28 | function Bit#(32) uint_to_int(Bit#(32) t); 29 | Bit#(32) d = 32'haaaaaaaa; 30 | t = t ^ d; 31 | t = t - d; 32 | return t; 33 | endfunction 34 | function Bit#(32)intShiftR(Bit#(32) t); 35 | Bit#(1) s = t[31]; 36 | t = t >> 1; 37 | t[31] = s; 38 | return t; 39 | endfunction 40 | 41 | (* synthesize *) 42 | module mkZfpDecompress (ZfpDecompressIfc); 43 | 44 | FIFO#(Bit#(ZfpComprTypeSz)) inputQ <- mkFIFO; 45 | FIFO#(Bit#(9)) eQ <- mkFIFO; 46 | 47 | FIFO#(Vector#(4,Bit#(32))) udataQ <- mkFIFO; 48 | FIFO#(Vector#(4,Bit#(32))) toUnblock_1 <- mkFIFO; 49 | FIFO#(Vector#(4,Bit#(32))) toUnblock_2 <- mkFIFO; 50 | FIFO#(Vector#(4,Bit#(32))) toUnblock_3 <- mkFIFO; 51 | FIFO#(Vector#(4,Bit#(32))) toUnblock_4 <- mkFIFO; 52 | FIFO#(Vector#(4,Bit#(32))) toConvertNega <- mkFIFO; 53 | 54 | FIFO#(Bit#(1)) signQ <- mkSizedFIFO(20); 55 | FIFO#(Bit#(32)) toGetResult <- mkFIFO; 56 | FIFO#(Bit#(32)) outputQ <- mkFIFO; 57 | 58 | rule decompFirst; 59 | inputQ.deq; 60 | Bit#(9) e = get_e(inputQ.first); 61 | Bit#(CompressedBitsTotal) comprTotal = get_d(inputQ.first); 62 | 63 | Bit#(TMul#(CompressedBitsEach,3)) toSecondStep = truncateLSB(comprTotal); 64 | Bit#(TMul#(CompressedBitsEach,2)) toThirdStep = truncateLSB(comprTotal); 65 | Bit#(CompressedBitsEach) toFourthStep = truncateLSB(comprTotal); 66 | 67 | Bit#(CompressedBitsEach) comprFirst = truncate(comprTotal); 68 | Bit#(CompressedBitsEach) comprSecond = truncate(toSecondStep); 69 | Bit#(CompressedBitsEach) comprThird = truncate(toThirdStep); 70 | Bit#(CompressedBitsEach) comprFourth = truncate(toFourthStep); 71 | 72 | Bit#(1) flagFirst = truncate(comprFirst); 73 | Bit#(1) flagSecond = truncate(comprSecond); 74 | Bit#(1) flagThird = truncate(comprThird); 75 | Bit#(1) flagFourth = truncate(comprFourth); 76 | 77 | Bit#(BitBudget) comprFirstDataTmp = truncateLSB(comprFirst); 78 | Bit#(BitBudget) comprSecondDataTmp = truncateLSB(comprSecond); 79 | Bit#(BitBudget) comprThirdDataTmp = truncateLSB(comprThird); 80 | Bit#(BitBudget) comprFourthDataTmp = truncateLSB(comprFourth); 81 | 82 | Bit#(32) comprFirstData = zeroExtend(comprFirstDataTmp); 83 | Bit#(32) comprSecondData = zeroExtend(comprSecondDataTmp); 84 | Bit#(32) comprThirdData = zeroExtend(comprThirdDataTmp); 85 | Bit#(32) comprFourthData = zeroExtend(comprFourthDataTmp); 86 | 87 | Bit#(32) udataFirst = 0; 88 | Bit#(32) udataSecond = 0; 89 | Bit#(32) udataThird = 0; 90 | Bit#(32) udataFourth = 0; 91 | 92 | Vector#(4,Bit#(32)) udata = replicate(0); 93 | 94 | if ( flagFirst == 0 ) begin 95 | udataFirst = comprFirstData << (32 - valueof(BitBudget) - 4); 96 | udata[0] = udataFirst; 97 | end else begin 98 | udataFirst = comprFirstData << (32 - valueof(BitBudget)); 99 | udata[0] = udataFirst; 100 | end 101 | if ( flagSecond == 0 ) begin 102 | udataSecond = comprSecondData << (32 - valueof(BitBudget) - 4); 103 | udata[1] = udataSecond; 104 | end else begin 105 | udataSecond = comprSecondData << (32 - valueof(BitBudget)); 106 | udata[1] = udataSecond; 107 | end 108 | if ( flagThird == 0 ) begin 109 | udataThird = comprThirdData << (32 - valueof(BitBudget) - 4); 110 | udata[2] = udataThird; 111 | end else begin 112 | udataThird = comprThirdData << (32 - valueof(BitBudget)); 113 | udata[2] = udataThird; 114 | end 115 | if ( flagFourth == 0 ) begin 116 | udataFourth = comprFourthData << (32 - valueof(BitBudget) - 4); 117 | udata[3] = udataFourth; 118 | end else begin 119 | udataFourth = comprFourthData << (32 - valueof(BitBudget)); 120 | udata[3] = udataFourth; 121 | end 122 | 123 | eQ.enq(e); 124 | udataQ.enq(udata); 125 | endrule 126 | 127 | rule convert; 128 | udataQ.deq; 129 | Vector#(4,Bit#(32)) d = udataQ.first; 130 | for (Bit#(3) i = 0; i < 4; i = i + 1) begin 131 | d[i] = uint_to_int(d[i]); 132 | end 133 | toUnblock_1.enq(d); 134 | endrule 135 | 136 | rule unblock_1; 137 | toUnblock_1.deq; 138 | Vector#(4,Bit#(32)) d = toUnblock_1.first; 139 | d[1] = d[1] + intShiftR(d[3]); d[3] = d[3] - intShiftR(d[1]); 140 | toUnblock_2.enq(d); 141 | endrule 142 | 143 | rule unblock_2; 144 | toUnblock_2.deq; 145 | Vector#(4,Bit#(32)) d = toUnblock_2.first; 146 | d[1] = d[1] + d[3]; d[3]= d[3] << 1; d[3] = d[3] - d[1]; 147 | d[2] = d[2] + d[0]; d[0]= d[0] << 1; d[0] = d[0] - d[2]; 148 | toUnblock_3.enq(d); 149 | endrule 150 | 151 | rule unblock_3; 152 | toUnblock_3.deq; 153 | Vector#(4,Bit#(32)) d = toUnblock_3.first; 154 | d[1] = d[1] + d[2]; d[2]= d[2] << 1; d[2] = d[2] - d[1]; 155 | toUnblock_4.enq(d); 156 | endrule 157 | 158 | rule unblock_4; 159 | toUnblock_4.deq; 160 | Vector#(4,Bit#(32)) d = toUnblock_4.first; 161 | d[3] = d[3] + d[0]; d[0]= d[0] << 1; d[0] = d[0] - d[3]; 162 | toConvertNega.enq(d); 163 | endrule 164 | 165 | Reg#(Bit#(2)) convertNegaCycle <- mkReg(0); 166 | Vector#(4,Reg#(Bit#(32))) convertNegaBuffer <- replicateM(mkReg(0)); 167 | rule convertNega; 168 | if ( convertNegaCycle == 0 ) begin 169 | toConvertNega.deq; 170 | Vector#(4,Bit#(32)) d = toConvertNega.first; 171 | Bit#(1) sign = 0; 172 | if ( d[0][31] == 1 ) begin 173 | d[0] = -d[0]; 174 | sign = 1; 175 | end else begin 176 | sign = 0; 177 | end 178 | for ( Bit#(3) i = 0; i < 4; i = i+1 ) begin 179 | convertNegaBuffer[i] <= d[i]; 180 | end 181 | convertNegaCycle <= convertNegaCycle + 1; 182 | toGetResult.enq(d[0]); 183 | signQ.enq(sign); 184 | end else begin 185 | let i = convertNegaCycle; 186 | let d = convertNegaBuffer[i]; 187 | Bit#(1) sign = 0; 188 | if (d[31] == 1) begin 189 | d = -d; 190 | sign = 1; 191 | end else begin 192 | sign = 0; 193 | end 194 | if ( i == 3 ) convertNegaCycle <= 0; 195 | else convertNegaCycle <= convertNegaCycle + 1; 196 | toGetResult.enq(d); 197 | signQ.enq(sign); 198 | end 199 | endrule 200 | 201 | Reg#(Bit#(9)) eBuffer <- mkReg(0); 202 | Reg#(Bit#(2)) getResultCycle <- mkReg(0); 203 | rule getResult; 204 | toGetResult.deq; 205 | signQ.deq; 206 | if ( getResultCycle == 0 ) begin 207 | eQ.deq; 208 | let e = eQ.first; 209 | eBuffer <= e; 210 | Bit#(9) expMax = e - 127; 211 | //$write("%d\n", expMax); 212 | Bit#(32) d = toGetResult.first; 213 | Bit#(32) decomp = 0; 214 | Bit#(32) tmp = 0; 215 | Bit#(1) sign = signQ.first; 216 | 217 | if ( expMax == fromInteger(385) ) begin 218 | //$write("came in to zero case\n"); 219 | tmp = d >> (127 + (32 - 2)); 220 | decomp[30:0] = tmp[30:0]; 221 | decomp[31] = sign; 222 | //Float value = unpack(decomp[i]); 223 | //$write("%d\n", value); 224 | //$write("sign bit is %d\n", sign[i]); 225 | end else begin 226 | if ( expMax < 30 ) begin 227 | //$write("came in to normal case_1\n"); 228 | tmp = d >> ((32 - 2) - expMax); 229 | decomp[30:0] = tmp[30:0]; 230 | decomp[31] = sign; 231 | //Float value = unpack(decomp[i]); 232 | //$write("%d\n", value); 233 | //$write("sign bit is %d\n", sign[i]); 234 | end else begin 235 | //$write("came in to normal case_2\n"); 236 | tmp = d << (expMax - (32 - 2)); 237 | decomp[30:0] = tmp[30:0]; 238 | decomp[31] = sign; 239 | //Float value = unpack(decomp[i]); 240 | //$write("%d\n", value); 241 | end 242 | end 243 | getResultCycle <= getResultCycle + 1; 244 | outputQ.enq(decomp); 245 | end else begin 246 | let e = eBuffer; 247 | Bit#(9) expMax = e - 127; 248 | //$write("%d\n", expMax); 249 | Bit#(32) d = toGetResult.first; 250 | Bit#(32) decomp = 0; 251 | Bit#(32) tmp = 0; 252 | Bit#(1) sign = signQ.first; 253 | 254 | if ( expMax == fromInteger(385) ) begin 255 | //$write("came in to zero case\n"); 256 | tmp = d >> (127 + (32 - 2)); 257 | decomp[30:0] = tmp[30:0]; 258 | decomp[31] = sign; 259 | //Float value = unpack(decomp[i]); 260 | //$write("%d\n", value); 261 | //$write("sign bit is %d\n", sign[i]); 262 | end else begin 263 | if ( expMax < 30 ) begin 264 | //$write("came in to normal case_1\n"); 265 | tmp = d >> ((32 - 2) - expMax); 266 | decomp[30:0] = tmp[30:0]; 267 | decomp[31] = sign; 268 | //Float value = unpack(decomp[i]); 269 | //$write("%d\n", value); 270 | //$write("sign bit is %d\n", sign[i]); 271 | end else begin 272 | //$write("came in to normal case_2\n"); 273 | tmp = d << (expMax - (32 - 2)); 274 | decomp[30:0] = tmp[30:0]; 275 | decomp[31] = sign; 276 | //Float value = unpack(decomp[i]); 277 | //$write("%d\n", value); 278 | end 279 | end 280 | if ( getResultCycle == 3 ) getResultCycle <= 0; 281 | else getResultCycle <= getResultCycle + 1; 282 | outputQ.enq(decomp); 283 | end 284 | endrule 285 | 286 | method Action put(Bit#(ZfpComprTypeSz) data); 287 | inputQ.enq(data); 288 | endmethod 289 | method ActionValue#(Bit#(32)) get; 290 | outputQ.deq; 291 | return outputQ.first; 292 | endmethod 293 | endmodule 294 | -------------------------------------------------------------------------------- /projects/nn_fc_zfpe/ZfpDecompress.bsv: -------------------------------------------------------------------------------- 1 | import Connectable::*; 2 | import FIFO::*; 3 | import FIFOF::*; 4 | import Vector::*; 5 | import SimpleFloat::*; 6 | import FloatingPoint::*; 7 | 8 | typedef 33 ZfpComprTypeSz; // e = 9, block = bit_budget(5)*4 = 20, flag = (0 or 1)*4 = 4 9 | typedef TSub#(ZfpComprTypeSz,9) CompressedBitsTotal; 10 | typedef TDiv#(CompressedBitsTotal,4) CompressedBitsEach; 11 | typedef TDiv#(TSub#(CompressedBitsTotal,4),4) BitBudget; 12 | 13 | interface ZfpDecompressIfc; 14 | method Action put(Bit#(ZfpComprTypeSz) data); 15 | method ActionValue#(Bit#(32)) get; 16 | endinterface 17 | 18 | function Bit#(9) get_e(Bit#(ZfpComprTypeSz) buffer); 19 | Bit#(9) e = 0; 20 | e = truncate(buffer); 21 | return e; 22 | endfunction 23 | function Bit#(CompressedBitsTotal) get_d(Bit#(ZfpComprTypeSz) buffer); 24 | Bit#(CompressedBitsTotal) d = 0; 25 | d = truncateLSB(buffer); 26 | return d; 27 | endfunction 28 | function Bit#(32) uint_to_int(Bit#(32) t); 29 | Bit#(32) d = 32'haaaaaaaa; 30 | t = t ^ d; 31 | t = t - d; 32 | return t; 33 | endfunction 34 | function Bit#(32)intShiftR(Bit#(32) t); 35 | Bit#(1) s = t[31]; 36 | t = t >> 1; 37 | t[31] = s; 38 | return t; 39 | endfunction 40 | 41 | (* synthesize *) 42 | module mkZfpDecompress (ZfpDecompressIfc); 43 | 44 | FIFO#(Bit#(ZfpComprTypeSz)) inputQ <- mkFIFO; 45 | FIFO#(Bit#(9)) eQ <- mkFIFO; 46 | 47 | FIFO#(Vector#(4,Bit#(32))) udataQ <- mkFIFO; 48 | FIFO#(Vector#(4,Bit#(32))) toUnblock_1 <- mkFIFO; 49 | FIFO#(Vector#(4,Bit#(32))) toUnblock_2 <- mkFIFO; 50 | FIFO#(Vector#(4,Bit#(32))) toUnblock_3 <- mkFIFO; 51 | FIFO#(Vector#(4,Bit#(32))) toUnblock_4 <- mkFIFO; 52 | FIFO#(Vector#(4,Bit#(32))) toConvertNega <- mkFIFO; 53 | 54 | FIFO#(Bit#(1)) signQ <- mkSizedFIFO(20); 55 | FIFO#(Bit#(32)) toGetResult <- mkFIFO; 56 | FIFO#(Bit#(32)) outputQ <- mkFIFO; 57 | 58 | rule decompFirst; 59 | inputQ.deq; 60 | Bit#(9) e = get_e(inputQ.first); 61 | Bit#(CompressedBitsTotal) comprTotal = get_d(inputQ.first); 62 | 63 | Bit#(TMul#(CompressedBitsEach,3)) toSecondStep = truncateLSB(comprTotal); 64 | Bit#(TMul#(CompressedBitsEach,2)) toThirdStep = truncateLSB(comprTotal); 65 | Bit#(CompressedBitsEach) toFourthStep = truncateLSB(comprTotal); 66 | 67 | Bit#(CompressedBitsEach) comprFirst = truncate(comprTotal); 68 | Bit#(CompressedBitsEach) comprSecond = truncate(toSecondStep); 69 | Bit#(CompressedBitsEach) comprThird = truncate(toThirdStep); 70 | Bit#(CompressedBitsEach) comprFourth = truncate(toFourthStep); 71 | 72 | Bit#(1) flagFirst = truncate(comprFirst); 73 | Bit#(1) flagSecond = truncate(comprSecond); 74 | Bit#(1) flagThird = truncate(comprThird); 75 | Bit#(1) flagFourth = truncate(comprFourth); 76 | 77 | Bit#(BitBudget) comprFirstDataTmp = truncateLSB(comprFirst); 78 | Bit#(BitBudget) comprSecondDataTmp = truncateLSB(comprSecond); 79 | Bit#(BitBudget) comprThirdDataTmp = truncateLSB(comprThird); 80 | Bit#(BitBudget) comprFourthDataTmp = truncateLSB(comprFourth); 81 | 82 | Bit#(32) comprFirstData = zeroExtend(comprFirstDataTmp); 83 | Bit#(32) comprSecondData = zeroExtend(comprSecondDataTmp); 84 | Bit#(32) comprThirdData = zeroExtend(comprThirdDataTmp); 85 | Bit#(32) comprFourthData = zeroExtend(comprFourthDataTmp); 86 | 87 | Bit#(32) udataFirst = 0; 88 | Bit#(32) udataSecond = 0; 89 | Bit#(32) udataThird = 0; 90 | Bit#(32) udataFourth = 0; 91 | 92 | Vector#(4,Bit#(32)) udata = replicate(0); 93 | 94 | if ( flagFirst == 0 ) begin 95 | udataFirst = comprFirstData << (32 - valueof(BitBudget) - 4); 96 | udata[0] = udataFirst; 97 | end else begin 98 | udataFirst = comprFirstData << (32 - valueof(BitBudget)); 99 | udata[0] = udataFirst; 100 | end 101 | if ( flagSecond == 0 ) begin 102 | udataSecond = comprSecondData << (32 - valueof(BitBudget) - 4); 103 | udata[1] = udataSecond; 104 | end else begin 105 | udataSecond = comprSecondData << (32 - valueof(BitBudget)); 106 | udata[1] = udataSecond; 107 | end 108 | if ( flagThird == 0 ) begin 109 | udataThird = comprThirdData << (32 - valueof(BitBudget) - 4); 110 | udata[2] = udataThird; 111 | end else begin 112 | udataThird = comprThirdData << (32 - valueof(BitBudget)); 113 | udata[2] = udataThird; 114 | end 115 | if ( flagFourth == 0 ) begin 116 | udataFourth = comprFourthData << (32 - valueof(BitBudget) - 4); 117 | udata[3] = udataFourth; 118 | end else begin 119 | udataFourth = comprFourthData << (32 - valueof(BitBudget)); 120 | udata[3] = udataFourth; 121 | end 122 | 123 | eQ.enq(e); 124 | udataQ.enq(udata); 125 | endrule 126 | 127 | rule convert; 128 | udataQ.deq; 129 | Vector#(4,Bit#(32)) d = udataQ.first; 130 | for (Bit#(3) i = 0; i < 4; i = i + 1) begin 131 | d[i] = uint_to_int(d[i]); 132 | end 133 | toUnblock_1.enq(d); 134 | endrule 135 | 136 | rule unblock_1; 137 | toUnblock_1.deq; 138 | Vector#(4,Bit#(32)) d = toUnblock_1.first; 139 | d[1] = d[1] + intShiftR(d[3]); d[3] = d[3] - intShiftR(d[1]); 140 | toUnblock_2.enq(d); 141 | endrule 142 | 143 | rule unblock_2; 144 | toUnblock_2.deq; 145 | Vector#(4,Bit#(32)) d = toUnblock_2.first; 146 | d[1] = d[1] + d[3]; d[3]= d[3] << 1; d[3] = d[3] - d[1]; 147 | d[2] = d[2] + d[0]; d[0]= d[0] << 1; d[0] = d[0] - d[2]; 148 | toUnblock_3.enq(d); 149 | endrule 150 | 151 | rule unblock_3; 152 | toUnblock_3.deq; 153 | Vector#(4,Bit#(32)) d = toUnblock_3.first; 154 | d[1] = d[1] + d[2]; d[2]= d[2] << 1; d[2] = d[2] - d[1]; 155 | toUnblock_4.enq(d); 156 | endrule 157 | 158 | rule unblock_4; 159 | toUnblock_4.deq; 160 | Vector#(4,Bit#(32)) d = toUnblock_4.first; 161 | d[3] = d[3] + d[0]; d[0]= d[0] << 1; d[0] = d[0] - d[3]; 162 | toConvertNega.enq(d); 163 | endrule 164 | 165 | Reg#(Bit#(2)) convertNegaCycle <- mkReg(0); 166 | Vector#(4,Reg#(Bit#(32))) convertNegaBuffer <- replicateM(mkReg(0)); 167 | rule convertNega; 168 | if ( convertNegaCycle == 0 ) begin 169 | toConvertNega.deq; 170 | Vector#(4,Bit#(32)) d = toConvertNega.first; 171 | Bit#(1) sign = 0; 172 | if ( d[0][31] == 1 ) begin 173 | d[0] = -d[0]; 174 | sign = 1; 175 | end else begin 176 | sign = 0; 177 | end 178 | for ( Bit#(3) i = 0; i < 4; i = i+1 ) begin 179 | convertNegaBuffer[i] <= d[i]; 180 | end 181 | convertNegaCycle <= convertNegaCycle + 1; 182 | toGetResult.enq(d[0]); 183 | signQ.enq(sign); 184 | end else begin 185 | let i = convertNegaCycle; 186 | let d = convertNegaBuffer[i]; 187 | Bit#(1) sign = 0; 188 | if (d[31] == 1) begin 189 | d = -d; 190 | sign = 1; 191 | end else begin 192 | sign = 0; 193 | end 194 | if ( i == 3 ) convertNegaCycle <= 0; 195 | else convertNegaCycle <= convertNegaCycle + 1; 196 | toGetResult.enq(d); 197 | signQ.enq(sign); 198 | end 199 | endrule 200 | 201 | Reg#(Bit#(9)) eBuffer <- mkReg(0); 202 | Reg#(Bit#(2)) getResultCycle <- mkReg(0); 203 | rule getResult; 204 | toGetResult.deq; 205 | signQ.deq; 206 | if ( getResultCycle == 0 ) begin 207 | eQ.deq; 208 | let e = eQ.first; 209 | eBuffer <= e; 210 | Bit#(9) expMax = e - 127; 211 | //$write("%d\n", expMax); 212 | Bit#(32) d = toGetResult.first; 213 | Bit#(32) decomp = 0; 214 | Bit#(32) tmp = 0; 215 | Bit#(1) sign = signQ.first; 216 | 217 | if ( expMax == fromInteger(385) ) begin 218 | //$write("came in to zero case\n"); 219 | tmp = d >> (127 + (32 - 2)); 220 | decomp[30:0] = tmp[30:0]; 221 | decomp[31] = sign; 222 | //Float value = unpack(decomp[i]); 223 | //$write("%d\n", value); 224 | //$write("sign bit is %d\n", sign[i]); 225 | end else begin 226 | if ( expMax < 30 ) begin 227 | //$write("came in to normal case_1\n"); 228 | tmp = d >> ((32 - 2) - expMax); 229 | decomp[30:0] = tmp[30:0]; 230 | decomp[31] = sign; 231 | //Float value = unpack(decomp[i]); 232 | //$write("%d\n", value); 233 | //$write("sign bit is %d\n", sign[i]); 234 | end else begin 235 | //$write("came in to normal case_2\n"); 236 | tmp = d << (expMax - (32 - 2)); 237 | decomp[30:0] = tmp[30:0]; 238 | decomp[31] = sign; 239 | //Float value = unpack(decomp[i]); 240 | //$write("%d\n", value); 241 | end 242 | end 243 | getResultCycle <= getResultCycle + 1; 244 | outputQ.enq(decomp); 245 | end else begin 246 | let e = eBuffer; 247 | Bit#(9) expMax = e - 127; 248 | //$write("%d\n", expMax); 249 | Bit#(32) d = toGetResult.first; 250 | Bit#(32) decomp = 0; 251 | Bit#(32) tmp = 0; 252 | Bit#(1) sign = signQ.first; 253 | 254 | if ( expMax == fromInteger(385) ) begin 255 | //$write("came in to zero case\n"); 256 | tmp = d >> (127 + (32 - 2)); 257 | decomp[30:0] = tmp[30:0]; 258 | decomp[31] = sign; 259 | //Float value = unpack(decomp[i]); 260 | //$write("%d\n", value); 261 | //$write("sign bit is %d\n", sign[i]); 262 | end else begin 263 | if ( expMax < 30 ) begin 264 | //$write("came in to normal case_1\n"); 265 | tmp = d >> ((32 - 2) - expMax); 266 | decomp[30:0] = tmp[30:0]; 267 | decomp[31] = sign; 268 | //Float value = unpack(decomp[i]); 269 | //$write("%d\n", value); 270 | //$write("sign bit is %d\n", sign[i]); 271 | end else begin 272 | //$write("came in to normal case_2\n"); 273 | tmp = d << (expMax - (32 - 2)); 274 | decomp[30:0] = tmp[30:0]; 275 | decomp[31] = sign; 276 | //Float value = unpack(decomp[i]); 277 | //$write("%d\n", value); 278 | end 279 | end 280 | if ( getResultCycle == 3 ) getResultCycle <= 0; 281 | else getResultCycle <= getResultCycle + 1; 282 | outputQ.enq(decomp); 283 | end 284 | endrule 285 | 286 | method Action put(Bit#(ZfpComprTypeSz) data); 287 | inputQ.enq(data); 288 | endmethod 289 | method ActionValue#(Bit#(32)) get; 290 | outputQ.deq; 291 | return outputQ.first; 292 | endmethod 293 | endmodule 294 | -------------------------------------------------------------------------------- /projects/nn_fc_zfpe/NnFc.bsv: -------------------------------------------------------------------------------- 1 | import FIFO::*; 2 | import FIFOF::*; 3 | import Vector::*; 4 | import BRAMFIFO::*; 5 | 6 | import SimpleFloat::*; 7 | import FloatingPoint::*; 8 | 9 | 10 | interface MacPeIfc; 11 | method Action putInput(Float v, Bit#(8) input_idx); 12 | method Action putWeight(Float w); 13 | method ActionValue#(Tuple3#(Float, Bit#(8), Bit#(14))) resultGet; 14 | method Bool resultExist; 15 | endinterface 16 | 17 | typedef 3 PeWaysLog; 18 | typedef TExp#(PeWaysLog) PeWays; 19 | 20 | Integer inputDim = 4097; 21 | Integer outputDim = 4096; 22 | Integer period = 64; 23 | 24 | module mkMacPe#(Bit#(PeWaysLog) peIdx) (MacPeIfc); 25 | Reg#(Bit#(32)) cycleCount <- mkReg(0); 26 | rule incCycleCount; 27 | cycleCount <= cycleCount + 1; 28 | endrule 29 | 30 | FIFO#(Float) weightQ <- mkSizedBRAMFIFO(32); 31 | FIFO#(Tuple2#(Float,Bit#(8))) inputQ <- mkSizedBRAMFIFO(32); 32 | FIFOF#(Tuple3#(Float, Bit#(8), Bit#(14))) outputQ <- mkFIFOF; 33 | 34 | FloatTwoOp fmult <- mkFloatMult; 35 | FloatTwoOp fadd <- mkFloatAdd; 36 | 37 | FIFO#(Float) addForwardQ <- mkSizedFIFO(6); 38 | FIFO#(Tuple3#(Bit#(8),Bit#(14),Bit#(16))) partialSumIdxQ2 <- mkSizedBRAMFIFO(512); 39 | FIFO#(Tuple3#(Bit#(8),Bit#(14),Bit#(16))) partialSumIdxQ1 <- mkFIFO; 40 | FIFO#(Float) partialSumQ <- mkSizedBRAMFIFO(512); 41 | FIFO#(Float) partialSumQ2 <- mkFIFO; 42 | 43 | Reg#(Bit#(8)) lastInputIdx <- mkReg(0); 44 | Reg#(Bit#(14)) curOutputIdx <- mkReg(zeroExtend(peIdx)); 45 | Reg#(Bit#(32)) curMacIdx <- mkReg(0); 46 | Reg#(Bit#(14)) curWeightPeriod <- mkReg(0); 47 | Reg#(Bit#(14)) periodCnt <- mkReg(1); 48 | 49 | Reg#(Bit#(32)) procStartCycle <- mkReg(0); 50 | Reg#(Bit#(32)) procCnt <- mkReg(0); 51 | 52 | rule enqMac; 53 | inputQ.deq; 54 | Float inf = tpl_1(inputQ.first); 55 | Bit#(8) ini = tpl_2(inputQ.first); 56 | weightQ.deq; 57 | Float wf = weightQ.first; 58 | //if ( (peIdx == 0) && ((curMacIdx + 1)>>9 >= fromInteger(inputDim))) begin 59 | // $write("current output index: %d\n", curOutputIdx); 60 | //end 61 | 62 | partialSumIdxQ1.enq(tuple3(ini,curOutputIdx,truncate(curMacIdx>>9))); 63 | if ( curWeightPeriod == 0 ) begin 64 | if ( (curMacIdx + 1)>>9 >= fromInteger(inputDim) ) begin 65 | curMacIdx <= 0; 66 | let nextOutIdx = curOutputIdx + fromInteger(valueOf(PeWays)); 67 | if ( nextOutIdx >= fromInteger(period) ) begin 68 | if ( ini == fromInteger(63) ) begin 69 | curOutputIdx <= curOutputIdx + fromInteger(valueOf(PeWays)); 70 | curWeightPeriod <= curWeightPeriod + 1; 71 | periodCnt <= periodCnt + 1; 72 | $write( "Round: %d\n", periodCnt ); 73 | end else begin 74 | curOutputIdx <= zeroExtend(peIdx); 75 | end 76 | end else begin 77 | curOutputIdx <= curOutputIdx + fromInteger(valueOf(PeWays)); 78 | end 79 | end else begin 80 | if ( (curOutputIdx + fromInteger(valueOf(PeWays))) >= fromInteger(period) ) begin 81 | curOutputIdx <= zeroExtend(peIdx); 82 | curMacIdx <= curMacIdx + 1; 83 | end else begin 84 | curMacIdx <= curMacIdx + 1; 85 | curOutputIdx <= curOutputIdx + fromInteger(valueOf(PeWays)); 86 | end 87 | end 88 | end else begin 89 | if ( (curMacIdx + 1)>>9 >= fromInteger(inputDim) ) begin 90 | curMacIdx <= 0; 91 | let nextOutIdx = curOutputIdx + fromInteger(valueOf(PeWays)); 92 | if ( nextOutIdx >= zeroExtend(fromInteger(period)*periodCnt) ) begin 93 | if ( ini == 63 ) begin 94 | if ( periodCnt == 64 ) begin 95 | curOutputIdx <= zeroExtend(peIdx); 96 | end else begin 97 | curOutputIdx <= curOutputIdx + fromInteger(valueOf(PeWays)); 98 | curWeightPeriod <= curWeightPeriod + 1; 99 | periodCnt <= periodCnt + 1; 100 | end 101 | $write( "Round: %d\n", periodCnt ); 102 | end else begin 103 | curOutputIdx <= zeroExtend(peIdx) + fromInteger(period)*(periodCnt - 1); 104 | end 105 | end else begin 106 | curOutputIdx <= curOutputIdx + fromInteger(valueOf(PeWays)); 107 | end 108 | end else begin 109 | if ( (curOutputIdx + fromInteger(valueOf(PeWays))) >= (fromInteger(period)*periodCnt) ) begin 110 | curOutputIdx <= zeroExtend(peIdx) + fromInteger(period)*(periodCnt - 1); 111 | curMacIdx <= curMacIdx + 1; 112 | end else begin 113 | curMacIdx <= curMacIdx + 1; 114 | curOutputIdx <= curOutputIdx + fromInteger(valueOf(PeWays)); 115 | end 116 | end 117 | end 118 | fmult.put(inf, wf); 119 | 120 | if ( procStartCycle == 0 ) procStartCycle <= cycleCount; 121 | procCnt <= procCnt + 1; 122 | 123 | if ( (procCnt & 32'h1ffff) == 32'h1ffff ) begin 124 | Bit#(32) procCycles = cycleCount - procStartCycle; 125 | Bit#(32) dutyCycle = procCycles/procCnt; 126 | $write( "PE %d -- OPs: %d Cycles: %d -> %d Cycles per OP\n", peIdx, procCnt, procCycles, dutyCycle ); 127 | end 128 | 129 | 130 | if ( curMacIdx < 512 ) begin 131 | addForwardQ.enq(unpack(0)); // float '0' 132 | end else begin 133 | partialSumQ2.deq; 134 | addForwardQ.enq(partialSumQ2.first); 135 | end 136 | endrule 137 | 138 | rule enqAdd; 139 | let mr <- fmult.get; 140 | fadd.put(mr,addForwardQ.first); 141 | addForwardQ.deq; 142 | endrule 143 | 144 | rule relayMacResult; 145 | let d <- fadd.get; 146 | partialSumQ.enq(d); 147 | endrule 148 | 149 | rule relayPartialIdx; 150 | partialSumIdxQ1.deq; 151 | partialSumIdxQ2.enq(partialSumIdxQ1.first); 152 | endrule 153 | 154 | rule filterDoneResults; 155 | partialSumIdxQ2.deq; 156 | let psi = partialSumIdxQ2.first; 157 | partialSumQ.deq; 158 | let ps = partialSumQ.first; 159 | if (tpl_3(psi)+1 == fromInteger(inputDim) ) begin 160 | outputQ.enq(tuple3(ps, tpl_1(psi), tpl_2(psi))); 161 | end else begin 162 | partialSumQ2.enq(ps); 163 | end 164 | endrule 165 | 166 | Reg#(Tuple2#(Float,Bit#(8))) inputReplicateReg <- mkReg(?); 167 | Reg#(Bit#(8)) inputReplicateCnt <- mkReg(0); 168 | Reg#(Bit#(3)) cnt <- mkReg(0); 169 | Reg#(Bit#(6)) cnt_t <- mkReg(0); 170 | 171 | FIFO#(Float) weightInQ <- mkSizedBRAMFIFO(32); 172 | FIFO#(Float) weightStoreQ <- mkSizedBRAMFIFO(9); 173 | FIFO#(Tuple2#(Float,Bit#(8))) inputInQ <- mkSizedBRAMFIFO(32); 174 | rule relayInputIn; 175 | if ( inputReplicateCnt == 0 ) begin 176 | inputInQ.deq; 177 | inputQ.enq(inputInQ.first); 178 | inputReplicateReg <= inputInQ.first; 179 | inputReplicateCnt <= 7; 180 | end else begin 181 | inputReplicateCnt <= inputReplicateCnt - 1; 182 | inputQ.enq(inputReplicateReg); 183 | end 184 | endrule 185 | rule relayWeightIn; 186 | if ( cnt_t == 0 ) begin 187 | if ( cnt == fromInteger(7) ) begin 188 | cnt <= 0; 189 | cnt_t <= cnt_t + 1; 190 | weightInQ.deq; 191 | weightQ.enq(weightInQ.first); 192 | weightStoreQ.enq(weightInQ.first); 193 | end else begin 194 | cnt <= cnt + 1; 195 | weightInQ.deq; 196 | weightQ.enq(weightInQ.first); 197 | weightStoreQ.enq(weightInQ.first); 198 | end 199 | end else begin 200 | if ( cnt_t == fromInteger(63) ) begin 201 | if ( cnt == fromInteger(7) ) begin 202 | cnt <= 0; 203 | cnt_t <= 0; 204 | weightStoreQ.deq; 205 | weightQ.enq(weightStoreQ.first); 206 | end else begin 207 | cnt <= cnt + 1; 208 | weightStoreQ.deq; 209 | weightQ.enq(weightStoreQ.first); 210 | end 211 | end else begin 212 | if ( cnt == fromInteger(7) ) begin 213 | cnt <= 0; 214 | cnt_t <= cnt_t + 1; 215 | weightStoreQ.deq; 216 | weightQ.enq(weightStoreQ.first); 217 | weightStoreQ.enq(weightStoreQ.first); 218 | end else begin 219 | cnt <= cnt + 1; 220 | weightStoreQ.deq; 221 | weightQ.enq(weightStoreQ.first); 222 | weightStoreQ.enq(weightStoreQ.first); 223 | end 224 | end 225 | end 226 | endrule 227 | method Action putInput(Float v, Bit#(8) input_idx); 228 | inputInQ.enq(tuple2(v,input_idx)); 229 | endmethod 230 | method Action putWeight(Float w); 231 | weightInQ.enq(w); 232 | endmethod 233 | method ActionValue#(Tuple3#(Float, Bit#(8), Bit#(14))) resultGet; 234 | outputQ.deq; 235 | return outputQ.first; 236 | endmethod 237 | method Bool resultExist; 238 | return outputQ.notEmpty; 239 | endmethod 240 | endmodule 241 | 242 | 243 | interface NnFcIfc; 244 | method Action dataIn(Float value, Bit#(8) input_idx); 245 | method Action weightIn(Float weight); 246 | method ActionValue#(Tuple3#(Float, Bit#(8), Bit#(14))) dataOut; 247 | endinterface 248 | 249 | module mkNnFc(NnFcIfc); 250 | Vector#(PeWays, MacPeIfc) pes; 251 | Vector#(PeWays, FIFO#(Float)) weightInQs <- replicateM(mkFIFO); 252 | Vector#(PeWays, FIFO#(Tuple2#(Float,Bit#(8)))) dataInQs <- replicateM(mkFIFO); 253 | Vector#(PeWays, FIFO#(Tuple3#(Float,Bit#(8),Bit#(14)))) resultOutQs <- replicateM(mkFIFO); 254 | 255 | for (Integer i = 0; i < valueOf(PeWays); i=i+1 ) begin 256 | pes[i] <- mkMacPe(fromInteger(i)); 257 | 258 | Reg#(Bit#(16)) weightInIdx <- mkReg(0); 259 | rule forwardWeights; 260 | weightInQs[i].deq; 261 | let w = weightInQs[i].first; 262 | if ( i < valueOf(PeWays)-1 ) begin 263 | weightInQs[i+1].enq(w); 264 | end 265 | 266 | weightInIdx <= weightInIdx + 1; 267 | Bit#(PeWaysLog) target_w = truncate(weightInIdx); 268 | if ( target_w == fromInteger(i) ) begin 269 | pes[i].putWeight(w); 270 | end 271 | endrule 272 | rule forwardInput; 273 | dataInQs[i].deq; 274 | let d = dataInQs[i].first; 275 | if ( i < valueOf(PeWays)-1 ) begin 276 | dataInQs[i+1].enq(d); 277 | end 278 | 279 | pes[i].putInput(tpl_1(d), tpl_2(d)); 280 | endrule 281 | rule forwardResult; 282 | if ( pes[i].resultExist ) begin 283 | let d <- pes[i].resultGet; 284 | resultOutQs[i].enq(d); 285 | end else if ( i < valueOf(PeWays)-1 ) begin 286 | resultOutQs[i+1].deq; 287 | resultOutQs[i].enq(resultOutQs[i+1].first); 288 | end 289 | endrule 290 | end 291 | 292 | method Action dataIn(Float value, Bit#(8) input_idx); 293 | dataInQs[0].enq(tuple2(value,input_idx)); 294 | endmethod 295 | method Action weightIn(Float weight); 296 | weightInQs[0].enq(weight); 297 | endmethod 298 | method ActionValue#(Tuple3#(Float, Bit#(8), Bit#(14))) dataOut; 299 | resultOutQs[0].deq; 300 | return resultOutQs[0].first; 301 | endmethod 302 | endmodule 303 | -------------------------------------------------------------------------------- /projects/nn_fc_accel/NnFc.bsv: -------------------------------------------------------------------------------- 1 | import FIFO::*; 2 | import FIFOF::*; 3 | import Vector::*; 4 | import BRAMFIFO::*; 5 | 6 | import SimpleFloat::*; 7 | import FloatingPoint::*; 8 | 9 | 10 | interface MacPeIfc; 11 | method Action putInput(Float v, Bit#(8) input_idx); 12 | method Action putWeight(Float w); 13 | method ActionValue#(Tuple3#(Float, Bit#(8), Bit#(14))) resultGet; 14 | method Bool resultExist; 15 | endinterface 16 | 17 | typedef 3 PeWaysLog; 18 | typedef TExp#(PeWaysLog) PeWays; 19 | 20 | Integer inputDim = 4097; 21 | Integer outputDim = 4096; 22 | Integer period = 64; 23 | 24 | module mkMacPe#(Bit#(PeWaysLog) peIdx) (MacPeIfc); 25 | Reg#(Bit#(32)) cycleCount <- mkReg(0); 26 | rule incCycleCount; 27 | cycleCount <= cycleCount + 1; 28 | endrule 29 | 30 | FIFO#(Float) weightQ <- mkSizedBRAMFIFO(32); 31 | FIFO#(Tuple2#(Float,Bit#(8))) inputQ <- mkSizedBRAMFIFO(32); 32 | FIFOF#(Tuple3#(Float, Bit#(8), Bit#(14))) outputQ <- mkFIFOF; 33 | 34 | FloatTwoOp fmult <- mkFloatMult; 35 | FloatTwoOp fadd <- mkFloatAdd; 36 | 37 | FIFO#(Float) addForwardQ <- mkSizedFIFO(6); 38 | FIFO#(Tuple3#(Bit#(8),Bit#(14),Bit#(16))) partialSumIdxQ2 <- mkSizedBRAMFIFO(512); 39 | FIFO#(Tuple3#(Bit#(8),Bit#(14),Bit#(16))) partialSumIdxQ1 <- mkFIFO; 40 | FIFO#(Float) partialSumQ <- mkSizedBRAMFIFO(512); 41 | FIFO#(Float) partialSumQ2 <- mkFIFO; 42 | 43 | Reg#(Bit#(8)) lastInputIdx <- mkReg(0); 44 | Reg#(Bit#(14)) curOutputIdx <- mkReg(zeroExtend(peIdx)); 45 | Reg#(Bit#(32)) curMacIdx <- mkReg(0); 46 | Reg#(Bit#(14)) curWeightPeriod <- mkReg(0); 47 | Reg#(Bit#(14)) periodCnt <- mkReg(1); 48 | 49 | Reg#(Bit#(32)) procStartCycle <- mkReg(0); 50 | Reg#(Bit#(32)) procCnt <- mkReg(0); 51 | 52 | rule enqMac; 53 | inputQ.deq; 54 | Float inf = tpl_1(inputQ.first); 55 | Bit#(8) ini = tpl_2(inputQ.first); 56 | weightQ.deq; 57 | Float wf = weightQ.first; 58 | //if ( (peIdx == 0) && ((curMacIdx + 1)>>9 >= fromInteger(inputDim))) begin 59 | // $write("current output index: %d\n", curOutputIdx); 60 | //end 61 | 62 | partialSumIdxQ1.enq(tuple3(ini,curOutputIdx,truncate(curMacIdx>>9))); 63 | if ( curWeightPeriod == 0 ) begin 64 | if ( (curMacIdx + 1)>>9 >= fromInteger(inputDim) ) begin 65 | curMacIdx <= 0; 66 | let nextOutIdx = curOutputIdx + fromInteger(valueOf(PeWays)); 67 | if ( nextOutIdx >= fromInteger(period) ) begin 68 | if ( ini == fromInteger(63) ) begin 69 | curOutputIdx <= curOutputIdx + fromInteger(valueOf(PeWays)); 70 | curWeightPeriod <= curWeightPeriod + 1; 71 | periodCnt <= periodCnt + 1; 72 | $write( "Round: %d\n", periodCnt ); 73 | end else begin 74 | curOutputIdx <= zeroExtend(peIdx); 75 | end 76 | end else begin 77 | curOutputIdx <= curOutputIdx + fromInteger(valueOf(PeWays)); 78 | end 79 | end else begin 80 | if ( (curOutputIdx + fromInteger(valueOf(PeWays))) >= fromInteger(period) ) begin 81 | curOutputIdx <= zeroExtend(peIdx); 82 | curMacIdx <= curMacIdx + 1; 83 | end else begin 84 | curMacIdx <= curMacIdx + 1; 85 | curOutputIdx <= curOutputIdx + fromInteger(valueOf(PeWays)); 86 | end 87 | end 88 | end else begin 89 | if ( (curMacIdx + 1)>>9 >= fromInteger(inputDim) ) begin 90 | curMacIdx <= 0; 91 | let nextOutIdx = curOutputIdx + fromInteger(valueOf(PeWays)); 92 | if ( nextOutIdx >= zeroExtend(fromInteger(period)*periodCnt) ) begin 93 | if ( ini == 63 ) begin 94 | if ( periodCnt == 64 ) begin 95 | curOutputIdx <= zeroExtend(peIdx); 96 | end else begin 97 | curOutputIdx <= curOutputIdx + fromInteger(valueOf(PeWays)); 98 | curWeightPeriod <= curWeightPeriod + 1; 99 | periodCnt <= periodCnt + 1; 100 | end 101 | $write( "Round: %d\n", periodCnt ); 102 | end else begin 103 | curOutputIdx <= zeroExtend(peIdx) + fromInteger(period)*(periodCnt - 1); 104 | end 105 | end else begin 106 | curOutputIdx <= curOutputIdx + fromInteger(valueOf(PeWays)); 107 | end 108 | end else begin 109 | if ( (curOutputIdx + fromInteger(valueOf(PeWays))) >= (fromInteger(period)*periodCnt) ) begin 110 | curOutputIdx <= zeroExtend(peIdx) + fromInteger(period)*(periodCnt - 1); 111 | curMacIdx <= curMacIdx + 1; 112 | end else begin 113 | curMacIdx <= curMacIdx + 1; 114 | curOutputIdx <= curOutputIdx + fromInteger(valueOf(PeWays)); 115 | end 116 | end 117 | end 118 | fmult.put(inf, wf); 119 | 120 | if ( procStartCycle == 0 ) procStartCycle <= cycleCount; 121 | procCnt <= procCnt + 1; 122 | 123 | if ( (procCnt & 32'h1ffff) == 32'h1ffff ) begin 124 | Bit#(32) procCycles = cycleCount - procStartCycle; 125 | Bit#(32) dutyCycle = procCycles/procCnt; 126 | $write( "PE %d -- OPs: %d Cycles: %d -> %d Cycles per OP\n", peIdx, procCnt, procCycles, dutyCycle ); 127 | end 128 | 129 | 130 | if ( curMacIdx < 512 ) begin 131 | addForwardQ.enq(unpack(0)); // float '0' 132 | end else begin 133 | partialSumQ2.deq; 134 | addForwardQ.enq(partialSumQ2.first); 135 | end 136 | endrule 137 | 138 | rule enqAdd; 139 | let mr <- fmult.get; 140 | fadd.put(mr,addForwardQ.first); 141 | addForwardQ.deq; 142 | endrule 143 | 144 | rule relayMacResult; 145 | let d <- fadd.get; 146 | partialSumQ.enq(d); 147 | endrule 148 | 149 | rule relayPartialIdx; 150 | partialSumIdxQ1.deq; 151 | partialSumIdxQ2.enq(partialSumIdxQ1.first); 152 | endrule 153 | 154 | rule filterDoneResults; 155 | partialSumIdxQ2.deq; 156 | let psi = partialSumIdxQ2.first; 157 | partialSumQ.deq; 158 | let ps = partialSumQ.first; 159 | if (tpl_3(psi)+1 == fromInteger(inputDim) ) begin 160 | outputQ.enq(tuple3(ps, tpl_1(psi), tpl_2(psi))); 161 | end else begin 162 | partialSumQ2.enq(ps); 163 | end 164 | endrule 165 | 166 | Reg#(Tuple2#(Float,Bit#(8))) inputReplicateReg <- mkReg(?); 167 | Reg#(Bit#(8)) inputReplicateCnt <- mkReg(0); 168 | Reg#(Bit#(3)) cnt <- mkReg(0); 169 | Reg#(Bit#(6)) cnt_t <- mkReg(0); 170 | 171 | FIFO#(Float) weightInQ <- mkSizedBRAMFIFO(32); 172 | FIFO#(Float) weightStoreQ <- mkSizedBRAMFIFO(9); 173 | FIFO#(Tuple2#(Float,Bit#(8))) inputInQ <- mkSizedBRAMFIFO(32); 174 | rule relayInputIn; 175 | if ( inputReplicateCnt == 0 ) begin 176 | inputInQ.deq; 177 | inputQ.enq(inputInQ.first); 178 | inputReplicateReg <= inputInQ.first; 179 | inputReplicateCnt <= 7; 180 | end else begin 181 | inputReplicateCnt <= inputReplicateCnt - 1; 182 | inputQ.enq(inputReplicateReg); 183 | end 184 | endrule 185 | rule relayWeightIn; 186 | if ( cnt_t == 0 ) begin 187 | if ( cnt == fromInteger(7) ) begin 188 | cnt <= 0; 189 | cnt_t <= cnt_t + 1; 190 | weightInQ.deq; 191 | weightQ.enq(weightInQ.first); 192 | weightStoreQ.enq(weightInQ.first); 193 | end else begin 194 | cnt <= cnt + 1; 195 | weightInQ.deq; 196 | weightQ.enq(weightInQ.first); 197 | weightStoreQ.enq(weightInQ.first); 198 | end 199 | end else begin 200 | if ( cnt_t == fromInteger(63) ) begin 201 | if ( cnt == fromInteger(7) ) begin 202 | cnt <= 0; 203 | cnt_t <= 0; 204 | weightStoreQ.deq; 205 | weightQ.enq(weightStoreQ.first); 206 | end else begin 207 | cnt <= cnt + 1; 208 | weightStoreQ.deq; 209 | weightQ.enq(weightStoreQ.first); 210 | end 211 | end else begin 212 | if ( cnt == fromInteger(7) ) begin 213 | cnt <= 0; 214 | cnt_t <= cnt_t + 1; 215 | weightStoreQ.deq; 216 | weightQ.enq(weightStoreQ.first); 217 | weightStoreQ.enq(weightStoreQ.first); 218 | end else begin 219 | cnt <= cnt + 1; 220 | weightStoreQ.deq; 221 | weightQ.enq(weightStoreQ.first); 222 | weightStoreQ.enq(weightStoreQ.first); 223 | end 224 | end 225 | end 226 | endrule 227 | method Action putInput(Float v, Bit#(8) input_idx); 228 | inputInQ.enq(tuple2(v,input_idx)); 229 | endmethod 230 | method Action putWeight(Float w); 231 | weightInQ.enq(w); 232 | endmethod 233 | method ActionValue#(Tuple3#(Float, Bit#(8), Bit#(14))) resultGet; 234 | outputQ.deq; 235 | return outputQ.first; 236 | endmethod 237 | method Bool resultExist; 238 | return outputQ.notEmpty; 239 | endmethod 240 | endmodule 241 | 242 | 243 | interface NnFcIfc; 244 | method Action dataIn(Float value, Bit#(8) input_idx); 245 | method Action weightIn(Float weight); 246 | method ActionValue#(Tuple3#(Float, Bit#(8), Bit#(14))) dataOut; 247 | endinterface 248 | 249 | module mkNnFc(NnFcIfc); 250 | Vector#(PeWays, MacPeIfc) pes; 251 | Vector#(PeWays, FIFO#(Float)) weightInQs <- replicateM(mkFIFO); 252 | Vector#(PeWays, FIFO#(Tuple2#(Float,Bit#(8)))) dataInQs <- replicateM(mkFIFO); 253 | Vector#(PeWays, FIFO#(Tuple3#(Float,Bit#(8),Bit#(14)))) resultOutQs <- replicateM(mkFIFO); 254 | 255 | for (Integer i = 0; i < valueOf(PeWays); i=i+1 ) begin 256 | pes[i] <- mkMacPe(fromInteger(i)); 257 | 258 | Reg#(Bit#(16)) weightInIdx <- mkReg(0); 259 | rule forwardWeights; 260 | weightInQs[i].deq; 261 | let w = weightInQs[i].first; 262 | if ( i < valueOf(PeWays)-1 ) begin 263 | weightInQs[i+1].enq(w); 264 | end 265 | 266 | weightInIdx <= weightInIdx + 1; 267 | Bit#(PeWaysLog) target_w = truncate(weightInIdx); 268 | if ( target_w == fromInteger(i) ) begin 269 | pes[i].putWeight(w); 270 | end 271 | endrule 272 | rule forwardInput; 273 | dataInQs[i].deq; 274 | let d = dataInQs[i].first; 275 | if ( i < valueOf(PeWays)-1 ) begin 276 | dataInQs[i+1].enq(d); 277 | end 278 | 279 | pes[i].putInput(tpl_1(d), tpl_2(d)); 280 | endrule 281 | rule forwardResult; 282 | if ( pes[i].resultExist ) begin 283 | let d <- pes[i].resultGet; 284 | resultOutQs[i].enq(d); 285 | end else if ( i < valueOf(PeWays)-1 ) begin 286 | resultOutQs[i+1].deq; 287 | resultOutQs[i].enq(resultOutQs[i+1].first); 288 | end 289 | endrule 290 | end 291 | 292 | method Action dataIn(Float value, Bit#(8) input_idx); 293 | dataInQs[0].enq(tuple2(value,input_idx)); 294 | endmethod 295 | method Action weightIn(Float weight); 296 | weightInQs[0].enq(weight); 297 | endmethod 298 | method ActionValue#(Tuple3#(Float, Bit#(8), Bit#(14))) dataOut; 299 | resultOutQs[0].deq; 300 | return resultOutQs[0].first; 301 | endmethod 302 | endmodule 303 | -------------------------------------------------------------------------------- /projects/nn_fc_zfpe/HwMain.bsv: -------------------------------------------------------------------------------- 1 | import Clocks :: *; 2 | import Vector::*; 3 | import FIFO::*; 4 | import BRAMFIFO::*; 5 | import Uart::*; 6 | import BRAMSubWord::*; 7 | import PLL::*; 8 | import Sdram::*; 9 | 10 | import Mult18x18D::*; 11 | import SimpleFloat::*; 12 | import FloatingPoint::*; 13 | 14 | import NnFc::*; 15 | import ZfpDecompress::*; 16 | import ZfpCompress::*; 17 | 18 | interface HwMainIfc; 19 | method ActionValue#(Bit#(8)) serial_tx; 20 | method Action serial_rx(Bit#(8) rx); 21 | endinterface 22 | 23 | module mkHwMain#(Ulx3sSdramUserIfc mem) (HwMainIfc); 24 | Clock curclk <- exposeCurrentClock; 25 | Reset currst <- exposeCurrentReset; 26 | 27 | Reg#(Bit#(32)) cycleCount <- mkReg(0); 28 | rule incCycleCount; 29 | cycleCount <= cycleCount + 1; 30 | endrule 31 | 32 | FIFO#(Bit#(8)) serialrxQ <- mkFIFO; 33 | FIFO#(Bit#(8)) serialtxQ <- mkFIFO; 34 | 35 | ZfpCompressIfc compressorO <- mkZfpCompress; 36 | ZfpDecompressIfc decompressorW <- mkZfpDecompress; 37 | ZfpDecompressIfc decompressorI <- mkZfpDecompress; 38 | NnFcIfc nn <- mkNnFc; 39 | 40 | Reg#(Maybe#(Bit#(8))) inputDst <- mkReg(tagged Invalid); 41 | rule recvInputDst(!isValid(inputDst)); 42 | Bit#(8) charin = serialrxQ.first(); 43 | serialrxQ.deq; 44 | inputDst <= tagged Valid charin; 45 | endrule 46 | Reg#(Bit#(40)) inputBuffer <- mkReg(0); 47 | Reg#(Bit#(40)) weightBuffer <- mkReg(0); 48 | Reg#(Bit#(3)) inputBufferCnt <- mkReg(0); 49 | Reg#(Bit#(3)) weightBufferCnt <- mkReg(0); 50 | FIFO#(Bit#(32)) memWriteWeightQ <- mkFIFO; //FIFO for storing weight values to memory 51 | FIFO#(Bit#(32)) memWriteInputQ <- mkFIFO; //FIFO for storing input values to memory 52 | rule recvInputFloat(isValid(inputDst)); 53 | Bit#(8) charin = serialrxQ.first(); 54 | serialrxQ.deq; 55 | let id = fromMaybe(?,inputDst); 56 | 57 | if ( id != 8'hff ) begin 58 | Bit#(40) nv_1 = (inputBuffer>>8)|(zeroExtend(charin)<<32); 59 | inputBuffer <= nv_1; 60 | inputDst <= tagged Invalid; 61 | if ( inputBufferCnt == 4 ) begin 62 | memWriteInputQ.enq(truncate(nv_1)); 63 | inputBufferCnt <= 0; 64 | end else begin 65 | inputBufferCnt <= inputBufferCnt + 1; 66 | end 67 | end else begin 68 | Bit#(40) nv_2 = (weightBuffer>>8)|(zeroExtend(charin)<<32); 69 | weightBuffer <= nv_2; 70 | inputDst <= tagged Invalid; 71 | if ( weightBufferCnt == 4 ) begin 72 | memWriteWeightQ.enq(truncate(nv_2)); 73 | Bit#(9) test = truncate(nv_2); 74 | weightBufferCnt <= 0; 75 | end else begin 76 | weightBufferCnt <= weightBufferCnt + 1; 77 | end 78 | end 79 | endrule 80 | 81 | rule putOutputtoComp; 82 | let out <- nn.dataOut; // tpl_1 = Float, tpl_2 = input idx, tpl_3 = output idx 83 | compressorO.put(pack(tpl_1(out))); 84 | endrule 85 | Reg#(Maybe#(Bit#(16))) memWriteOutputBuffer <- mkReg(tagged Invalid); 86 | Reg#(Bit#(24)) memWriteOutputAddr <- mkReg(8523776); 87 | Reg#(Bool) memWriteOutputDone <- mkReg(False); 88 | rule procMemWriteOutput; //8523776 ~ 8654847 (Memory Space for Output) 89 | if ( isValid(memWriteOutputBuffer) ) begin 90 | memWriteOutputBuffer <= tagged Invalid; 91 | mem.req(memWriteOutputAddr,fromMaybe(?,memWriteOutputBuffer),True); 92 | if ( memWriteOutputAddr + 1 == fromInteger(8654848) ) begin 93 | memWriteOutputDone <= True; 94 | $write("Finished saving output\n"); 95 | end 96 | end else begin 97 | let r <- compressorO.get; 98 | mem.req(memWriteOutputAddr,truncate(r),True); 99 | memWriteOutputBuffer <= tagged Valid truncate(r>>16); 100 | end 101 | memWriteOutputAddr <= memWriteOutputAddr + 1; 102 | endrule 103 | 104 | Reg#(Bit#(24)) memReadOutputAddr <- mkReg(8523776); 105 | FIFO#(Bit#(16)) memReadOutputQ <- mkSizedBRAMFIFO(64); 106 | rule procMemReadOutputReq( memWriteOutputDone ); 107 | if ( memReadOutputAddr + 1 == memWriteOutputAddr ) memReadOutputAddr <= 0; 108 | else memReadOutputAddr <= memReadOutputAddr + 1; 109 | mem.req(memReadOutputAddr,?,False); 110 | endrule 111 | rule procMemReadOutputResp( memWriteOutputDone ); 112 | let d <- mem.readResp; 113 | memReadOutputQ.enq(d); 114 | endrule 115 | 116 | Reg#(Bit#(8)) memReadOutputBuffer <- mkReg(0); 117 | Reg#(Bit#(3)) memReadOutputBufferCnt <- mkReg(0); 118 | rule serializeOutput; 119 | if ( memReadOutputBufferCnt > 0 ) begin 120 | serialtxQ.enq(memReadOutputBuffer); 121 | memReadOutputBufferCnt <= 0; 122 | end else begin 123 | memReadOutputQ.deq; 124 | let d = memReadOutputQ.first; 125 | serialtxQ.enq(truncate(d)); 126 | memReadOutputBuffer <= truncateLSB(d); 127 | memReadOutputBufferCnt <= memReadOutputBufferCnt + 1; 128 | end 129 | endrule 130 | 131 | 132 | Reg#(Maybe#(Bit#(16))) memWriteWeightBuffer <- mkReg(tagged Invalid); 133 | Reg#(Maybe#(Bit#(16))) memWriteInputBuffer <- mkReg(tagged Invalid); 134 | Reg#(Bit#(24)) memWriteWeightAddr <- mkReg(0); 135 | Reg#(Bit#(24)) memWriteInputAddr <- mkReg(8390656); 136 | Reg#(Bool) memWriteWeightDone <- mkReg(False); 137 | Reg#(Bool) memWriteDone <- mkReg(False); 138 | rule procMemWriteWeight; //0 ~ 8390655 (Memory Space for Weight & bias) 139 | if ( isValid(memWriteWeightBuffer) ) begin 140 | memWriteWeightBuffer <= tagged Invalid; 141 | mem.req(memWriteWeightAddr,fromMaybe(?,memWriteWeightBuffer),True); 142 | if ( memWriteWeightAddr + 1 == fromInteger(8390656) ) begin 143 | memWriteWeightDone <= True; 144 | $write("Finished saving weight and bias values\n"); 145 | end 146 | end else begin 147 | memWriteWeightQ.deq; 148 | let d = memWriteWeightQ.first; 149 | mem.req(memWriteWeightAddr,truncate(d),True); 150 | memWriteWeightBuffer <= tagged Valid truncate(d>>16); 151 | end 152 | memWriteWeightAddr <= memWriteWeightAddr + 1; 153 | //$write("%d\n", memWriteWeightAddr); 154 | endrule 155 | rule procMemWriteInput;//8390656 ~ 8521727 (Memory Space for Input) 156 | if ( isValid(memWriteInputBuffer) ) begin 157 | memWriteInputBuffer <= tagged Invalid; 158 | mem.req(memWriteInputAddr,fromMaybe(?,memWriteInputBuffer),True); 159 | if ( memWriteInputAddr + 1 == fromInteger(8523776) ) begin 160 | memWriteDone <= True; 161 | $write("Finished saving input values\n"); 162 | end 163 | end else begin 164 | memWriteInputQ.deq; 165 | let d = memWriteInputQ.first; 166 | mem.req(memWriteInputAddr,truncate(d),True); 167 | memWriteInputBuffer <= tagged Valid truncate(d>>16); 168 | end 169 | memWriteInputAddr <= memWriteInputAddr + 1; 170 | endrule 171 | 172 | FIFO#(Bit#(16)) memReadWeightQ <- mkSizedBRAMFIFO(128); 173 | FIFO#(Bit#(16)) memReadInputQ <- mkSizedBRAMFIFO(128); 174 | FIFO#(Bit#(1)) memReadDstQ <- mkSizedBRAMFIFO(64); 175 | 176 | Reg#(Bit#(24)) memReadWeightAddr <- mkReg(0); 177 | Reg#(Bit#(24)) memReadInputAddr <- mkReg(8390656); 178 | 179 | Reg#(Bit#(8)) weightCntUp <- mkReg(0); 180 | Reg#(Bit#(8)) weightCntDn <- mkReg(0); 181 | Reg#(Bit#(8)) inputCntUp <- mkReg(0); 182 | Reg#(Bit#(8)) inputCntDn <- mkReg(0); 183 | 184 | Reg#(Bit#(8)) cntW <- mkReg(0); 185 | Reg#(Bit#(8)) cntI <- mkReg(0); 186 | 187 | Reg#(Bit#(1)) memReadDst <- mkReg(0); 188 | 189 | rule procMemReadWeightReq( (memReadDst == 0) && (weightCntUp - weightCntDn < 128) && memWriteDone ); 190 | if ( memReadWeightAddr + 1 == memWriteWeightAddr ) memReadWeightAddr <= 0; 191 | else memReadWeightAddr <= memReadWeightAddr + 1; 192 | mem.req(memReadWeightAddr,?,False); 193 | if ( cntW + 1 == fromInteger(64) ) begin 194 | memReadDst <= 1; 195 | cntW <= 0; 196 | end else begin 197 | cntW <= cntW + 1; 198 | end 199 | memReadDstQ.enq(0); 200 | //$write("weight stack: %d\n", (weightCntUp - weightCntDn)); 201 | endrule 202 | rule procMemReadInputReq( (memReadDst == 1) && (inputCntUp - inputCntDn < 128) ); 203 | if ( memReadInputAddr + 1 == memWriteInputAddr ) memReadInputAddr <= fromInteger(8390656); 204 | else memReadInputAddr <= memReadInputAddr + 1; 205 | mem.req(memReadInputAddr,?,False); 206 | if ( cntI + 1 == fromInteger(64) ) begin 207 | memReadDst <= 0; 208 | cntI <= 0; 209 | end else begin 210 | cntI <= cntI + 1; 211 | end 212 | memReadDstQ.enq(1); 213 | //$write("input stack: %d\n", (inputCntUp - inputCntDn)); 214 | endrule 215 | Reg#(Bit#(32)) memReadCnt <- mkReg(0); 216 | rule procMemReadResp; 217 | let d <- mem.readResp; 218 | memReadDstQ.deq; 219 | memReadCnt <= memReadCnt + 1; 220 | 221 | if ( (memReadCnt&32'h7ffff) == 0 ) begin 222 | $write( "Debug Mem Read: %d cycles -- %d mem Reads\n", cycleCount, memReadCnt ); 223 | end 224 | 225 | if ( memReadDstQ.first == 0 ) begin 226 | memReadWeightQ.enq(d); 227 | weightCntUp <= weightCntUp + 1; 228 | end else begin 229 | memReadInputQ.enq(d); 230 | inputCntUp <= inputCntUp + 1; 231 | end 232 | endrule 233 | 234 | Reg#(Bit#(32)) memReadWeightBuffer <- mkReg(0); 235 | Reg#(Bit#(2)) memReadWeightBufferCnt <- mkReg(0); 236 | rule putWeighttoDecomp; 237 | memReadWeightQ.deq; 238 | weightCntDn <= weightCntDn + 1; 239 | let d = memReadWeightQ.first; 240 | Bit#(32) nv = (memReadWeightBuffer>>16)|(zeroExtend(d)<<16); 241 | memReadWeightBuffer <= nv; 242 | 243 | if ( memReadWeightBufferCnt == 1 ) begin 244 | memReadWeightBufferCnt <= 0; 245 | decompressorW.put(zeroExtend(nv)); 246 | end else begin 247 | memReadWeightBufferCnt <= memReadWeightBufferCnt + 1; 248 | end 249 | endrule 250 | 251 | Reg#(Bit#(32)) memReadInputBuffer <- mkReg(0); 252 | Reg#(Bit#(2)) memReadInputBufferCnt <- mkReg(0); 253 | rule putInputtoDecomp; 254 | memReadInputQ.deq; 255 | inputCntDn <= inputCntDn + 1; 256 | let d = memReadInputQ.first; 257 | Bit#(32) nv = (memReadInputBuffer>>16)|(zeroExtend(d)<<16); 258 | memReadInputBuffer <= nv; 259 | 260 | if ( memReadInputBufferCnt == 1 ) begin 261 | memReadInputBufferCnt <= 0; 262 | decompressorI.put(zeroExtend(nv)); 263 | end else begin 264 | memReadInputBufferCnt <= memReadInputBufferCnt + 1; 265 | end 266 | endrule 267 | 268 | rule serializeWeight; 269 | let d <- decompressorW.get; 270 | nn.weightIn(unpack(d)); 271 | endrule 272 | Reg#(Bit#(8)) inputIdxCycle <- mkReg(0); 273 | rule serializeInput; 274 | let d <- decompressorI.get; 275 | nn.dataIn(unpack(d),inputIdxCycle); 276 | if ( inputIdxCycle == 63 ) inputIdxCycle <= 0; 277 | else inputIdxCycle <= inputIdxCycle + 1; 278 | 279 | endrule 280 | 281 | method ActionValue#(Bit#(8)) serial_tx; 282 | serialtxQ.deq; 283 | return serialtxQ.first(); 284 | endmethod 285 | method Action serial_rx(Bit#(8) d); 286 | serialrxQ.enq(d); 287 | endmethod 288 | endmodule 289 | --------------------------------------------------------------------------------