├── .gitignore ├── Makefile ├── README.md ├── add_float.t.v ├── add_float.v ├── approx_sigmoid.py ├── comp_float.t.v ├── comp_float.v ├── conversion ├── Makefile ├── f2h.cpp ├── gen_data.cpp └── h2f.cpp ├── data ├── b1.txt ├── b2.txt ├── w1.txt └── w2.txt ├── div_float.v ├── documentation.md ├── experiments ├── allset.v └── exp.v ├── filters ├── net.filter └── sigmoid.filter ├── guard.bash ├── input_conditioner.v ├── lerp.v ├── matmul.t.v ├── matmul.v ├── mul_float.t.v ├── mul_float.v ├── net.t.v ├── net.v ├── net_wrapper.t.v ├── net_wrapper.v ├── polyfit.py ├── sigmoid.t.v ├── sigmoid.v └── x.tcl /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/c++,vim,git,linux,python,vivado 3 | 4 | ### C++ ### 5 | # Prerequisites 6 | *.d 7 | 8 | # Compiled Object files 9 | *.slo 10 | *.lo 11 | *.o 12 | *.obj 13 | 14 | # Precompiled Headers 15 | *.gch 16 | *.pch 17 | 18 | # Compiled Dynamic libraries 19 | *.so 20 | *.dylib 21 | *.dll 22 | 23 | # Fortran module files 24 | *.mod 25 | *.smod 26 | 27 | # Compiled Static libraries 28 | *.lai 29 | *.la 30 | *.a 31 | *.lib 32 | 33 | # Executables 34 | *.exe 35 | *.out 36 | *.app 37 | 38 | 39 | ### Vim ### 40 | # swap 41 | [._]*.s[a-w][a-z] 42 | [._]s[a-w][a-z] 43 | # session 44 | Session.vim 45 | # temporary 46 | .netrwhist 47 | *~ 48 | # auto-generated tag files 49 | tags 50 | 51 | 52 | ### Git ### 53 | *.orig 54 | 55 | 56 | ### Linux ### 57 | 58 | # temporary files which can be created if a process still has a handle open of a deleted file 59 | .fuse_hidden* 60 | 61 | # KDE directory preferences 62 | .directory 63 | 64 | # Linux trash folder which might appear on any partition or disk 65 | .Trash-* 66 | 67 | # .nfs files are created when an open file is removed but is still being accessed 68 | .nfs* 69 | 70 | 71 | ### Python ### 72 | # Byte-compiled / optimized / DLL files 73 | __pycache__/ 74 | *.py[cod] 75 | *$py.class 76 | 77 | # C extensions 78 | 79 | # Distribution / packaging 80 | .Python 81 | env/ 82 | build/ 83 | develop-eggs/ 84 | dist/ 85 | downloads/ 86 | eggs/ 87 | .eggs/ 88 | lib/ 89 | lib64/ 90 | parts/ 91 | sdist/ 92 | var/ 93 | *.egg-info/ 94 | .installed.cfg 95 | *.egg 96 | 97 | # PyInstaller 98 | # Usually these files are written by a python script from a template 99 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 100 | *.manifest 101 | *.spec 102 | 103 | # Installer logs 104 | pip-log.txt 105 | pip-delete-this-directory.txt 106 | 107 | # Unit test / coverage reports 108 | htmlcov/ 109 | .tox/ 110 | .coverage 111 | .coverage.* 112 | .cache 113 | nosetests.xml 114 | coverage.xml 115 | *,cover 116 | .hypothesis/ 117 | 118 | # Translations 119 | *.mo 120 | *.pot 121 | 122 | # Django stuff: 123 | *.log 124 | local_settings.py 125 | 126 | # Flask stuff: 127 | instance/ 128 | .webassets-cache 129 | 130 | # Scrapy stuff: 131 | .scrapy 132 | 133 | # Sphinx documentation 134 | docs/_build/ 135 | 136 | # PyBuilder 137 | target/ 138 | 139 | # Jupyter Notebook 140 | .ipynb_checkpoints 141 | 142 | # pyenv 143 | .python-version 144 | 145 | # celery beat schedule file 146 | celerybeat-schedule 147 | 148 | # dotenv 149 | .env 150 | 151 | # virtualenv 152 | .venv/ 153 | venv/ 154 | ENV/ 155 | 156 | # Spyder project settings 157 | .spyderproject 158 | 159 | # Rope project settings 160 | .ropeproject 161 | 162 | 163 | ### Vivado ### 164 | ######################################################################################################### 165 | ## This is an example .gitignore file for Vivado, please treat it as an example as 166 | ## it might not be complete. In addition, XAPP 1165 should be followed. 167 | ######################################################################################################### 168 | ######### 169 | #Exclude all 170 | ######### 171 | !*/ 172 | !.gitignore 173 | ########################################################################### 174 | ## VIVADO 175 | ########################################################################### 176 | ######### 177 | #Source files: 178 | ######### 179 | #Do NOT ignore VHDL, Verilog, block diagrams or EDIF files. 180 | !*.vhd 181 | !*.v 182 | !*.bd 183 | !*.edif 184 | ######### 185 | #IP files 186 | ######### 187 | #.xci: synthesis and implemented not possible - you need to return back to the previous version to generate output products 188 | #.xci + .dcp: implementation possible but not re-synthesis 189 | #*.xci(www.spiritconsortium.org) 190 | !*.xci 191 | #*.dcp(checkpoint files) 192 | !*.dcp 193 | !*.vds 194 | !*.pb 195 | #All bd comments and layout coordinates are stored within .ui 196 | !*.ui 197 | !*.ooc 198 | ######### 199 | #System Generator 200 | ######### 201 | !*.mdl 202 | !*.slx 203 | !*.bxml 204 | ######### 205 | #Simulation logic analyzer 206 | ######### 207 | !*.wcfg 208 | !*.coe 209 | ######### 210 | #MIG 211 | ######### 212 | !*.prj 213 | !*.mem 214 | ######### 215 | #Project files 216 | ######### 217 | #XPR + *.XML ? XPR (Files are merged into a single XPR file for 2014.1 version) 218 | #Do NOT ignore *.xpr files 219 | !*.xpr 220 | #Include *.xml files for 2013.4 or earlier version 221 | !*.xml 222 | ######### 223 | #Constraint files 224 | ######### 225 | #Do NOT ignore *.xdc files 226 | !*.xdc 227 | ######### 228 | #TCL - files 229 | ######### 230 | !*.tcl 231 | ######### 232 | #Journal - files 233 | ######### 234 | !*.jou 235 | ######### 236 | #Reports 237 | ######### 238 | !*.rpt 239 | !*.txt 240 | !*.vdi 241 | ######### 242 | #C-files 243 | ######### 244 | !*.c 245 | !*.h 246 | !*.elf 247 | !*.bmm 248 | !*.xmp 249 | 250 | *.vcd 251 | *.gtkw 252 | 253 | data/ 254 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all : mul_float.o add_float.o comp_float.o matmul.o sigmoid.o net.o net_wrapper.o 2 | 3 | .PHONY: run 4 | 5 | run: all 6 | @echo "===== -- mul_float -- =====" 7 | ./mul_float.o 8 | @echo "===== -- add_float -- =====" 9 | ./add_float.o 10 | @echo "===== -- comp_float -- =====" 11 | ./comp_float.o 12 | @echo "===== -- matmul -- =====" 13 | ./matmul.o 14 | @echo "===== -- sigmoid -- =====" 15 | ./sigmoid.o 16 | @echo "===== -- net -- =====" 17 | ./net.o 18 | @echo "===== -- net_wrapper -- =====" 19 | ./net_wrapper.o 20 | 21 | mul_float.o : mul_float.v mul_float.t.v 22 | iverilog mul_float.t.v -o mul_float.o 23 | 24 | add_float.o : add_float.v add_float.t.v 25 | iverilog add_float.t.v -o add_float.o 26 | 27 | comp_float.o : comp_float.v comp_float.t.v 28 | iverilog comp_float.t.v -o comp_float.o 29 | 30 | matmul.o : matmul.v matmul.t.v mul_float.v add_float.v 31 | iverilog matmul.t.v -o matmul.o 32 | 33 | sigmoid.o : sigmoid.v sigmoid.t.v add_float.v mul_float.v div_float.v 34 | iverilog sigmoid.t.v -o sigmoid.o 35 | 36 | net.o : net.v net.t.v sigmoid.o matmul.o 37 | time iverilog net.t.v -o net.o 38 | 39 | net_wrapper.o : net_wrapper.v net_wrapper.t.v net.o 40 | iverilog net_wrapper.t.v -o net_wrapper.o 41 | clean: 42 | rm *.o 43 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CompArch Final Project 2 | 3 | The goal of the final project is for you to explore a topic of interest within Computer Architecture, driven by your personal learning goals. This could build on and extend something we discussed in class, or dive into some other area of Computer Architecture (broadly defined). 4 | 5 | You may work in teams of any size, as long as they are appropriately scaled for your proposed project. Groups with > 4 members will face heavy skepticism about meeting this requirement. 6 | 7 | In terms of scale, this is not a months-long capstone but rather more like an extended Lab. You will have about 2 weeks to complete it, and it will comprise 15% of your final grade. Be ambitious but realistic. 8 | 9 | ## Timeline 10 | 11 | - Nov 17 (in class) – project ideation and team formation fair 12 | - Nov 28 (in class) – draft project proposal due , consultations with teams 13 | - Nov 29 – revised project proposal and work plan due 14 | - Dec 5 – mid-point check in (self-defined in work plan, highly recommended) 15 | - Dec 15 – final project due 16 | 17 | ## Proposal (10%) 18 | Your project proposal should be about 1-2 pages, and must include: 19 | 20 | - Project title 21 | - Team members 22 | - Brief description of project (1-3 paragraphs) 23 | - 2-3 references you plan to use 24 | - Minimum, planned, and stretch deliverables 25 | - Work plan (by Tuesday) 26 | 27 | We will discuss your proposal in class on November 28 (first class after break). These meetings will be quick and to-the-point, so you must come prepared with a printed out copy of your proposal. You should have done some background research by this point and have a good idea of your planned project trajectory. 28 | 29 | Based on the feedback from this meeting, you will revise your proposal and submit the final version including a work plan the following day. 30 | 31 | ## Documentation (55%) 32 | The documentation counts for 55% of your grade whether you succeed at your goal or not. Did you shoot for the moon and land among the harsh vacuum of space? You still learned something from the process, and as long as you document it well, you will get full credit. 33 | 34 | Documentation should be posted in the form of a project website (PDF or MarkDown in a repo can also be acceptable depending on the project) and must answer the following questions: 35 | 36 | ### What did you do? 37 | Your project abstract: one catchy sentence followed by a paragraph or two. The intended audience should include people that aren't necessarily versed in Computer Architecture, but are technically competent. 38 | ### Why did you do it? 39 | A paragraph or so about why the project you chose is worthwhile and interesting. 40 | ### How did you do it? 41 | This portion can assume an audience that has taken Computer Architecture, but don't let the story you’re telling get bogged down by buzzwords. A sure sign of a bad engineer is ORA (over reliance on acronyms). 42 | 43 | ### How can someone else build on it? 44 | Include everything necessary to pick up where you left off. This should include (as appropriate): 45 | 46 | - code 47 | - schematics 48 | - scripts and build instructions 49 | - proper attribution for resources used and anything you did not write yourself 50 | - list of difficulties and ‘gotchas’ while doing this project 51 | - reflection on the project as a whole as well as your work plan 52 | - possible TODOs to extend the depth of the project 53 | 54 | This should all be posted somewhere accessible, e.g. your project webpage or repository. Please do not literally include these question prompts and then answer them (you're better than that) - instead, use them to check that you've covered all the bases as you tell the story in the way that best makes sense for your project. 55 | 56 | ## Choosing and Achieving your Goal (30%) 57 | There is a lot of flexibility available in what your actual final project can be. As a first pass, it needs to satisfy the following criteria: 58 | 59 | 1. Build upon what we have learned in class this semester or other "Computer Architecture" topics 60 | 1. Have well-defined criteria for when it is finished and successful 61 | 1. Be achievable within the time allotted 62 | 63 | ## Possible broad directions: 64 | 65 | - Extending something you started in Computer Architecture 66 | - Teaching somebody something cool about Computer Architecture 67 | - Something useful to someone that uses Computer Architecture 68 | - Something that needs the skills learned in Computer Architecture 69 | - Something that you can present at Expo that will make people want to take Computer Architecture 70 | 71 | Append one of the following phrases to a cool project idea to make it more CompArch-y: 72 | 73 | - ... with an FPGA 74 | - ... in assembly 75 | - ... on a GPU 76 | - ... inside a nested series of black boxes 77 | - ... hardware accelerated 78 | 79 | As you put your project plans together, remember that a major portion of the project is communicating it to others. 80 | 81 | ## Demo (5%) 82 | We’ll present your project work during the time blocked out for "final exam" period – December 15 from 12 – 3PM. This is mainly an opportunity to show off and celebrate your great work (small percentage of overall grade), and the details are up to you. 83 | 84 | The "default" option is a poster version of your project documentation (along with a running live demo if appropriate), so that folks can walk around in a studio session and see what you did. Maybe you feel that a presentation is more appropriate for your project work. Perhaps a tutorial session with everyone participating makes the most sense. It could be that only a puppet show truly captures the essence of your project. Think about final demo format as you put together your proposal, but you don't need to make a final decision just yet. 85 | 86 | Good luck, and have fun! 87 | 88 | 89 | -------------------------------------------------------------------------------- /add_float.t.v: -------------------------------------------------------------------------------- 1 | //`ifndef __ADD_FLOAT_T_V__ 2 | //`define __ADD_FLOAT_T_V__ 3 | //`include "add_float.v" 4 | // 5 | //module test_add(); 6 | // 7 | // 8 | //reg rst_n, clk=0, start; 9 | //reg [31:0] a, b; 10 | //wire [31:0] o; 11 | //wire nan, overflow, underflow, zero, done; 12 | // 13 | //add_float #(.FLOAT_WIDTH(32)) add(rst_n, clk, start, 1'b0, a, b, o, nan, overflow, underflow, zero, done); 14 | // 15 | //always begin 16 | // #10 17 | // clk = !clk; 18 | //end 19 | // 20 | //always @(posedge done) begin 21 | // $display("%H %H %H %d", a, b, o, $time); 22 | //end 23 | // 24 | // 25 | //initial begin 26 | // $display("a b o"); 27 | // check(32'h40a00000, 32'h40000000); 28 | // #500 29 | // check(32'h400g0000, 32'h3f800000); 30 | // #500 31 | // $finish; 32 | //end 33 | // 34 | //task check; 35 | // input [31:0] lhs, rhs; 36 | // begin 37 | // rst_n = 1'b0; 38 | // @(negedge clk); 39 | // a = lhs; 40 | // b = rhs; 41 | // start = 1; 42 | // @(negedge clk); 43 | // start = 0; 44 | // rst_n = 1'b1; 45 | // @(posedge done); 46 | // end 47 | //endtask 48 | // 49 | //endmodule 50 | //`endif 51 | // 52 | `ifndef __ADD_FLOAT_T_V__ 53 | `define __ADD_FLOAT_T_V__ 54 | `include "add_float.v" 55 | 56 | module test_add(); 57 | 58 | reg rst_n, clk=0, start; 59 | reg [31:0] a, b; 60 | wire [31:0] o; 61 | wire nan, overflow, underflow, zero, done; 62 | 63 | add_float #(.FLOAT_WIDTH(32)) add(rst_n, clk, start, 1'b0, a, b, o, nan, overflow, underflow, zero, done); 64 | 65 | always begin 66 | #10 67 | clk = !clk; 68 | end 69 | 70 | always @(posedge done) begin 71 | $display("%H %H %H", a, b, o); 72 | end 73 | 74 | initial begin 75 | $display("a b o"); 76 | 77 | rst_n = 1'b0; 78 | @(negedge clk); 79 | a = 32'h3f800000; 80 | b = 32'h3f800000; 81 | start = 1; 82 | @(negedge clk); 83 | start = 0; 84 | rst_n = 1'b1; 85 | #500; 86 | $finish; 87 | end 88 | 89 | endmodule 90 | `endif 91 | -------------------------------------------------------------------------------- /add_float.v: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yycho0108/CompArchNeuralNet/d4799406d41134e9f1a94142fe454d4ffa5c4a5d/add_float.v -------------------------------------------------------------------------------- /approx_sigmoid.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import numpy as np 4 | import sys 5 | from matplotlib import pyplot as plt 6 | 7 | def sigmoid(x): 8 | return 1.0 /(1.0 + np.exp(-x)) 9 | def fast_sigmoid(x): 10 | return 0.5*(1 + x / (1.0 + np.abs(x))) 11 | 12 | # x -> abs(x) -> 1.0 + % -> x/% -> 1 + % - 0.5 * % 13 | def main(): 14 | if sys.argv[1].lower() == 'plot': 15 | xs = np.linspace(-10,10,200) 16 | ys = sigmoid(xs) 17 | p = np.polyfit(xs, ys, 5) 18 | f = np.poly1d(p) 19 | 20 | plt.plot(xs,ys) 21 | plt.plot(xs,fast_sigmoid(xs)) 22 | plt.plot(xs,f(xs)) 23 | 24 | 25 | plt.title('Approximated Sigmoid') 26 | plt.legend(['sigmoid', 'fast sigmoid', 'polyfit'], loc='lower right') 27 | 28 | plt.show() 29 | else: 30 | x = float(sys.argv[1]) 31 | print 'sigmoid :', sigmoid(x) 32 | print 'approx :', fast_sigmoid(x) 33 | 34 | if __name__ == "__main__": 35 | main() 36 | 37 | 38 | -------------------------------------------------------------------------------- /comp_float.t.v: -------------------------------------------------------------------------------- 1 | `ifndef __COMP_FLOAT_T_V__ 2 | `define __COMP_FLOAT_T_V__ 3 | `include "comp_float.v" 4 | 5 | module test_comp_float(); 6 | 7 | wire [2:0] flag; 8 | reg [31:0] a; 9 | reg [31:0] b; 10 | 11 | comp_float cmp(flag, a,b); 12 | 13 | initial begin 14 | a = 32'h3f800000; 15 | b = 32'h40000000; 16 | #500; 17 | $display("%H %H | %b", a, b, flag); 18 | 19 | a = 32'h40000000; 20 | b = 32'h3f800000; 21 | #500; 22 | $display("%H %H | %b", a, b, flag); 23 | 24 | a = 32'h40000000; 25 | b = 32'h40000000; 26 | #500; 27 | $display("%H %H | %b", a, b, flag); 28 | end 29 | 30 | endmodule 31 | `endif 32 | -------------------------------------------------------------------------------- /comp_float.v: -------------------------------------------------------------------------------- 1 | `ifndef __COMP_FLOAT_V__ 2 | `define __COMP_FLOAT_V__ 3 | module comp_float( 4 | output [2:0] flag, 5 | input [31:0] a, 6 | input [31:0] b 7 | ); 8 | 9 | wire s1, s2; 10 | wire [7:0] e1, e2; 11 | wire [22:0] m1, m2; 12 | 13 | assign {s1,e1,m1} = a; 14 | assign {s2,e2,m2} = b; 15 | 16 | wire [1:0] sign, exp, mag; 17 | 18 | assign sign= {s1,s2}; 19 | 20 | assign exp= (e1 > e2) ? 2'b10: 21 | (e2 > e1) ? 2'b01: 2'b00; 22 | 23 | assign mag= (exp == 2'b00) ? ((m1 > m2) ? 2'b10: 24 | (m2 > m1) ? 2'b01: 2'b00): 2'b11; 25 | 26 | assign flag= (sign == 2'b00) ? ((exp == 2'b10) ? 3'b100: 27 | (exp == 2'b01) ? 3'b001: 28 | (mag == 2'b10) ? 3'b100: 29 | (mag == 2'b01) ? 3'b001:3'b010): 30 | (sign == 2'b11) ? ((exp == 2'b10) ? 3'b001: 31 | (exp == 2'b01) ? 3'b100: 32 | (mag == 2'b10) ? 3'b001: 33 | (mag == 2'b01) ? 3'b100:3'b010): 34 | (sign == 2'b10) ? 3'b100 : 3'b001; 35 | endmodule 36 | `endif 37 | -------------------------------------------------------------------------------- /conversion/Makefile: -------------------------------------------------------------------------------- 1 | all: h2f.o f2h.o gen_data.o 2 | 3 | h2f.o: h2f.cpp 4 | g++ h2f.cpp -o h2f.o 5 | 6 | f2h.o: f2h.cpp 7 | g++ f2h.cpp -o f2h.o 8 | 9 | gen_data.o : gen_data.cpp 10 | g++ -std=c++11 gen_data.cpp -L/usr/lib -larmadillo -o gen_data.o 11 | -------------------------------------------------------------------------------- /conversion/f2h.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | void usage(){ 6 | fprintf(stderr, "USAGE : f2h \n"); 7 | } 8 | 9 | int main(int argc, char* argv[]){ 10 | if(argc != 3){ 11 | usage(); 12 | return -1; 13 | } 14 | 15 | float a = std::atof(argv[1]); 16 | float b = std::atof(argv[2]); 17 | 18 | float c = a*b; 19 | 20 | printf("a : %x\n",*(unsigned int*)(&a)); 21 | printf("b : %x\n",*(unsigned int*)(&b)); 22 | printf("a*b : %x",*(unsigned int*)(&c)); 23 | } 24 | -------------------------------------------------------------------------------- /conversion/gen_data.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #define ARMA_DONT_USE_CXX11 7 | #include 8 | 9 | std::string f2h(const float& f){ 10 | char str[9] = {}; 11 | snprintf(str, 9, "%x", *(unsigned int*)(&f)); 12 | return str; 13 | } 14 | 15 | float h2f(const std::string& h){ 16 | unsigned int x; 17 | std::stringstream ss; 18 | ss << std::hex << h; 19 | ss >> x; 20 | return reinterpret_cast(x); 21 | } 22 | void print_hex(char name, const arma::mat& m){ 23 | std::cout << name << std::endl; 24 | for(unsigned int i=0; i(h,c); 49 | arma::mat b = arma::randn(c,w); 50 | arma::mat o = a*b; 51 | 52 | std::cout << a << std::endl; 53 | std::cout << b << std::endl; 54 | std::cout << o << std::endl; 55 | 56 | 57 | print_verilog_hex('a', a); 58 | print_verilog_hex('b', b); 59 | print_hex('a', a); 60 | print_hex('b', b); 61 | print_hex('o', a*b); 62 | } 63 | -------------------------------------------------------------------------------- /conversion/h2f.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | void usage(){ 7 | fprintf(stderr, "USAGE : h2f \n"); 8 | } 9 | 10 | int main(int argc, char* argv[]){ 11 | if(argc != 2){ 12 | usage(); 13 | return -1; 14 | } 15 | 16 | unsigned int x; 17 | std::stringstream ss; 18 | ss << std::hex << argv[1]; 19 | ss >> x; 20 | float f = reinterpret_cast(x); 21 | printf("%f\n", f); 22 | } 23 | -------------------------------------------------------------------------------- /data/b1.txt: -------------------------------------------------------------------------------- 1 | c0484701 2 | c005bde9 3 | 3f3bd890 4 | bf8ad9d4 5 | 6 | -------------------------------------------------------------------------------- /data/b2.txt: -------------------------------------------------------------------------------- 1 | c0643a93 2 | 3 | -------------------------------------------------------------------------------- /data/w1.txt: -------------------------------------------------------------------------------- 1 | 40b73992 2 | c0d0bf63 3 | c0a61919 4 | 40773ea3 5 | c01ff261 6 | c0453fb9 7 | 3e93541e 8 | be53b6ca 9 | 10 | -------------------------------------------------------------------------------- /data/w2.txt: -------------------------------------------------------------------------------- 1 | 4124d498 2 | 410ba3e7 3 | c092b275 4 | bf84c951 5 | 6 | -------------------------------------------------------------------------------- /div_float.v: -------------------------------------------------------------------------------- 1 | `ifndef __DIV_FLOAT_V__ 2 | `define __DIV_FLOAT_V__ 3 | module div_float 4 | #(parameter 5 | FLOAT_WIDTH = 64 6 | ) 7 | ( 8 | input wire rst_n, clk, start, 9 | input wire [FLOAT_WIDTH - 1: 0] op1, op2, 10 | output reg [FLOAT_WIDTH - 1: 0] out_reg, 11 | output reg divizion_by_zero_reg, 12 | output reg nan_reg, 13 | output reg overflow_reg, 14 | output reg underflow_reg, 15 | output reg zero_reg, 16 | output reg done_reg 17 | ); 18 | localparam EXP_WIDTH = (FLOAT_WIDTH == 64) ? 11: 8; 19 | localparam FRACTION_WIDTH = (FLOAT_WIDTH == 64) ? 52: 23; 20 | localparam FULL_FRACTION_WIDTH = 2 * FRACTION_WIDTH + 1; 21 | localparam SIGN_BIT = FLOAT_WIDTH - 1; 22 | localparam EXP_MSB = SIGN_BIT - 1; 23 | localparam EXP_LSB = EXP_MSB - EXP_WIDTH + 1; 24 | localparam EXP_SHIFT = (2 ** (EXP_WIDTH - 1)) - 1; 25 | localparam EXP_MAX = (2 ** (EXP_WIDTH)) - 1; 26 | localparam FRACTION_MSB = EXP_LSB - 1; 27 | localparam DIV_COUNTER_WIDTH = (FLOAT_WIDTH == 64) ? 7: 6; 28 | localparam NAN_VALUE = (FLOAT_WIDTH == 64) ? 64'hFFF8_0000_0000_0000: 32'hFFC0_0000; 29 | localparam INF_VALUE = (FLOAT_WIDTH == 64) ? 64'h7FF0_0000_0000_0000: 32'h7F80_0000; 30 | 31 | wire [FRACTION_MSB: 0] frac1 = op1[FRACTION_MSB: 0], 32 | frac2 = op2[FRACTION_MSB: 0]; 33 | 34 | reg [FRACTION_WIDTH: 0] result_frac_reg; 35 | 36 | reg [FULL_FRACTION_WIDTH - 1: 0] op1frac_stage_reg, op2frac_stage_reg; 37 | wire op2_aligned = frac1 < frac2; 38 | // exponent calculation 39 | wire [EXP_WIDTH - 1: 0] exp1 = op1[EXP_MSB: EXP_LSB], 40 | exp2 = op2[EXP_MSB: EXP_LSB]; 41 | 42 | wire [EXP_WIDTH: 0] result_exp_before_correction, temp_result; 43 | 44 | reg [EXP_WIDTH: 0] result_exp_reg; 45 | 46 | wire zero1 = exp1 == 0, 47 | exp1_max = exp1 == EXP_MAX, 48 | frac1_zero = frac1 == 0, 49 | inf1 = exp1_max & frac1_zero, 50 | nan1 = exp1_max & !frac1_zero, 51 | zero2 = exp2 == 0, 52 | exp2_max = exp2 == EXP_MAX, 53 | frac2_zero = frac2 == 0, 54 | inf2 = exp2_max & frac2_zero, 55 | nan2 = exp2_max & !frac2_zero, 56 | inf_out = zero2 & (!(nan1 | zero1)), 57 | zero_out = zero1 & (!(nan2 | zero1)), 58 | underflow_before_correction = (temp_result < exp2), 59 | zero_before_correction = result_exp_before_correction == 0; 60 | 61 | assign 62 | temp_result = EXP_SHIFT + exp1, 63 | result_exp_before_correction = temp_result - exp2; 64 | 65 | always@* 66 | begin 67 | if(underflow_reg) begin 68 | result_exp_reg = 0; 69 | end 70 | else if (overflow_reg) begin 71 | result_exp_reg = EXP_MAX; 72 | end 73 | else 74 | begin 75 | result_exp_reg = result_exp_before_correction - op2_aligned; 76 | end 77 | end 78 | //-------------------- 79 | // fractional calculation 80 | reg [DIV_COUNTER_WIDTH - 1: 0] div_counter_reg; 81 | wire sign1 = op1[SIGN_BIT], 82 | sign2 = op2[SIGN_BIT], 83 | result_sign = sign1 ^ sign2; 84 | 85 | always@* 86 | begin 87 | overflow_reg = result_exp_reg[EXP_WIDTH]; 88 | underflow_reg = underflow_before_correction || (zero_before_correction && op2_aligned); 89 | nan_reg = nan1 | nan2 | (inf1 & inf2) | (zero1 & zero2); 90 | end 91 | 92 | always@(negedge rst_n, posedge clk) 93 | begin 94 | if(!rst_n) 95 | begin 96 | op1frac_stage_reg <= 0; 97 | op2frac_stage_reg <= 0; 98 | div_counter_reg <= 0; 99 | result_frac_reg <= 0; 100 | done_reg <= 0; 101 | end 102 | else 103 | begin 104 | if(start) 105 | begin 106 | op1frac_stage_reg <= 0; 107 | op2frac_stage_reg <= 0; 108 | div_counter_reg <= 0; 109 | result_frac_reg <= 0; 110 | done_reg <= 0; 111 | end 112 | else 113 | begin 114 | if(div_counter_reg == 0) 115 | begin 116 | op1frac_stage_reg <= {1'b1, frac1, {FRACTION_WIDTH {1'b0}}}; 117 | op2frac_stage_reg <= (op2_aligned)? {2'b01, frac2[FRACTION_MSB: 0], {FRACTION_WIDTH - 1 {1'b0}}}: {1'b1, frac2, {FRACTION_WIDTH {1'b0}}}; 118 | end 119 | else 120 | begin 121 | if(div_counter_reg < FRACTION_WIDTH + 3) 122 | begin 123 | if(op1frac_stage_reg >= op2frac_stage_reg && op1frac_stage_reg != 0) 124 | begin 125 | op1frac_stage_reg <= op1frac_stage_reg - op2frac_stage_reg; 126 | result_frac_reg <= {result_frac_reg[FRACTION_WIDTH - 1: 0], 1'b1}; 127 | end 128 | else 129 | begin 130 | result_frac_reg <= {result_frac_reg[FRACTION_WIDTH - 1: 0], 1'b0}; 131 | end 132 | op2frac_stage_reg <= op2frac_stage_reg >> 1; 133 | end 134 | else 135 | begin 136 | if(nan_reg) 137 | begin 138 | out_reg <= NAN_VALUE; 139 | end 140 | else if(inf_out) 141 | begin 142 | out_reg <= INF_VALUE | (result_sign << SIGN_BIT); 143 | end 144 | else if(zero_out) 145 | begin 146 | out_reg <= 0; 147 | end 148 | else 149 | begin 150 | out_reg <= {result_sign, result_exp_reg[EXP_WIDTH - 1: 0], result_frac_reg[FRACTION_WIDTH: 1]}; 151 | end 152 | div_counter_reg <= 0; 153 | done_reg <= 1; 154 | end 155 | end 156 | div_counter_reg <= div_counter_reg + 1'b1; 157 | end 158 | end 159 | end 160 | endmodule 161 | `endif 162 | -------------------------------------------------------------------------------- /documentation.md: -------------------------------------------------------------------------------- 1 | ## Documentation 2 | 3 | ## Links 4 | 5 | [Proposal](https://docs.google.com/document/d/1ULqsxGpxzVEamMZO5F3Pto4A9VCm7r37K5DoMaEWpnU/edit?usp=sharing) 6 | 7 | [Demo Video](http://www.youtube.com) 8 | 9 | ### What did I do? 10 | 11 | I implemented a generic feedforward nerual network in verilog, with approximated sigmoid activation function. Based on precomputed weights from a backpropagation network in Armadillo, I was able to load the parameters onto the network and verify the results for an XOR trained for XOR classification, the most basic application of the neural network demonstrating its functions. 12 | 13 | ### Why did I do it? 14 | 15 | The operations that govern a neural network are, by nature, heavily parallel, whereas a CPU is mostly sequential. Modern machine-learning architectures take advantage of the massively parallel processing power of the GPU to accelerate the training and computing process, but [recent benchmarks](https://liu.diva-portal.org/smash/get/diva2:930724/FULLTEXT01.pdf) show that as more parallelism is required, FPGAs can outperform GPUs; indeed, unlike GPUs that require external computing interfaces, FPGAs can be a efficient and independent alternative to the task of training the neural network and computing predictions. 16 | 17 | ### How did I do this? 18 | 19 | Implementing an FPU library *and* building a neural network would have been beyond the scope of this project, so I used the FPU library from [here](https://github.com/arktur04/FPU). In retrospect, this particular choice of the FPU library was unfortunate since it wasn't very synthesis-friendly and caused conflicts when implementing on the FPGA. 20 | 21 | ### Challenges 22 | 23 | Floating Point Library 24 | Stage-Based Computation 25 | Matrix Multiplication and Indexing 26 | Compiling Time 27 | Implementing on the FPGA - ambiguous clocks, non-synthesizable FPU 28 | 29 | ### Building Upon 30 | -------------------------------------------------------------------------------- /experiments/allset.v: -------------------------------------------------------------------------------- 1 | module allset(); 2 | 3 | reg [7:0] b; 4 | 5 | initial begin 6 | b = 8'b00000000; 7 | $display("%b %b", b, &b); 8 | #50 9 | b = 8'b11110000; 10 | $display("%b %b", b, &b); 11 | #50 12 | b = 8'b11111111; 13 | $display("%b %b", b, &b); 14 | $finish; 15 | end 16 | 17 | endmodule 18 | -------------------------------------------------------------------------------- /experiments/exp.v: -------------------------------------------------------------------------------- 1 | module addition (s3, e3, m3, s1, s2, e1, e2, m1, m2); 2 | 3 | output s3; 4 | output [7:0] e3; 5 | output [22:0] m3; 6 | input s1, s2; 7 | input [7:0] e1, e2; 8 | input [22:0] m1, m2; 9 | 10 | wire s3; 11 | wire [7:0] e3; 12 | wire [22:0] m3; 13 | wire diff; 14 | wire [3:0] count; 15 | wire [7:0] mbuff6,counter; 16 | wire [24:0] mbuff1, mbuff2, mbuff3,mbuff4,mbuff5; 17 | 18 | 19 | assign diff = (e1 > e2) ? 1'b1 : 20 | (e2 > e1) ? 1'b0 : 21 | (m1 > m2) ? 1'b1 : 1'b0; 22 | 23 | assign s3 = (s1==s2) ? s1: 24 | (diff==1'b1) ? s1: s2; 25 | 26 | 27 | assign counter = (diff == 0) ? e2 - e1 : e1 - e2; 28 | assign mbuff1 = (diff == 0) ? {2'b01,m1} : {2'b01,m2}; 29 | assign mbuff3 = (diff == 0) ? {2'b01,m2} : {2'b01,m1}; 30 | assign mbuff6 = (diff == 0) ? (e2 - 8'b01111111) : (e1 - 8'b01111111); 31 | 32 | assign mbuff2 = mbuff1>>counter; 33 | 34 | 35 | assign mbuff4 = (s1==s2) ? (mbuff2 + mbuff3) : (mbuff3 - mbuff2); 36 | 37 | 38 | assign mbuff5= (mbuff4[24]==1) ? (mbuff4 << 1'b1): 39 | (mbuff4[23]==1) ? (mbuff4 << 2'b10): 40 | (mbuff4[22]==1) ? (mbuff4 << 2'b11): 41 | (mbuff4[21]==1) ? (mbuff4 << 3'b100): 42 | (mbuff4[20]==1) ? (mbuff4 << 3'b101): 43 | (mbuff4[19]==1) ? (mbuff4 << 3'b110): 44 | (mbuff4[18]==1) ? (mbuff4 << 3'b111): 45 | (mbuff4[17]==1) ? (mbuff4 << 4'b1000): 46 | (mbuff4[16]==1) ? (mbuff4 << 4'b1001): 47 | (mbuff4[15]==1) ? (mbuff4 << 4'b1010): 48 | (mbuff4[14]==1) ? (mbuff4 << 4'b1011): 49 | (mbuff4[13]==1) ? (mbuff4 << 4'b1100): 50 | (mbuff4[12]==1) ? (mbuff4 << 4'b1101): 51 | (mbuff4[11]==1) ? (mbuff4 << 4'b1110): 52 | (mbuff4[10]==1) ? (mbuff4 << 4'b1111): 53 | (mbuff4[9]==1) ? (mbuff4 << 5'b10000): 54 | (mbuff4[8]==1) ? (mbuff4 << 5'b10001): 55 | (mbuff4[7]==1) ? (mbuff4 << 5'b10010): 56 | (mbuff4[6]==1) ? (mbuff4 << 5'b10011): 57 | (mbuff4[5]==1) ? (mbuff4 << 5'b10100): 58 | (mbuff4[4]==1) ? (mbuff4 << 5'b10101): 59 | (mbuff4[3]==1) ? (mbuff4 << 5'b10110): 60 | (mbuff4[2]==1) ? (mbuff4 << 5'b10111): 61 | (mbuff4[1]==1) ? (mbuff4 << 5'b11000): 62 | (mbuff4[0]==1) ? (mbuff4 << 1): 25'b0000000000000000000000000; 63 | 64 | 65 | 66 | assign e3= (mbuff4[24]==1) ? (mbuff6 + 8'b10000000): 67 | (mbuff4[23]==1) ? (mbuff6 + 8'b01111111): 68 | (mbuff4[22]==1) ? (mbuff6 + 8'b01111111 - 1'b1): 69 | (mbuff4[21]==1) ? (mbuff6 + 8'b01111111 - 2'b10): 70 | (mbuff4[20]==1) ? (mbuff6 + 8'b01111111 - 2'b11): 71 | (mbuff4[19]==1) ? (mbuff6 + 8'b01111111 - 3'b100): 72 | (mbuff4[18]==1) ? (mbuff6 + 8'b01111111 - 3'b101): 73 | (mbuff4[17]==1) ? (mbuff6 + 8'b01111111 - 3'b110): 74 | (mbuff4[16]==1) ? (mbuff6 + 8'b01111111 - 3'b111): 75 | (mbuff4[15]==1) ? (mbuff6 + 8'b01111111 - 4'b1000): 76 | (mbuff4[14]==1) ? (mbuff6 + 8'b01111111 - 4'b1001): 77 | (mbuff4[13]==1) ? (mbuff6 + 8'b01111111 - 4'b1010): 78 | (mbuff4[12]==1) ? (mbuff6 + 8'b01111111 - 4'b1011): 79 | (mbuff4[11]==1) ? (mbuff6 + 8'b01111111 - 4'b1100): 80 | (mbuff4[10]==1) ? (mbuff6 + 8'b01111111 - 4'b1101): 81 | (mbuff4[9]==1) ? (mbuff6 + 8'b01111111 - 4'b1110): 82 | (mbuff4[8]==1) ? (mbuff6 + 8'b01111111 - 4'b1111): 83 | (mbuff4[7]==1) ? (mbuff6 + 8'b01111111 - 5'b10000): 84 | (mbuff4[6]==1) ? (mbuff6 + 8'b01111111 - 5'b10001): 85 | (mbuff4[5]==1) ? (mbuff6 + 8'b01111111 - 5'b10010): 86 | (mbuff4[4]==1) ? (mbuff6 + 8'b01111111 - 5'b10011): 87 | (mbuff4[3]==1) ? (mbuff6 + 8'b01111111 - 5'b10100): 88 | (mbuff4[2]==1) ? (mbuff6 + 8'b01111111 - 5'b10101): 89 | (mbuff4[1]==1) ? (mbuff6 + 8'b01111111 - 5'b10110): 90 | (mbuff4[0]==1) ? (mbuff6 + 8'b01111111 - 5'b10111):8'b00000000; 91 | 92 | assign m3= mbuff5[24:2]; 93 | 94 | endmodule 95 | 96 | 97 | module multiplication (s3, e3, m3, s1, s2, e1, e2, m1, m2); 98 | 99 | output s3; 100 | output [7:0] e3; 101 | output [22:0] m3; 102 | input s1, s2; 103 | input [7:0] e1, e2; 104 | input [22:0] m1, m2; 105 | 106 | wire [7:0] mbuff2, mbuff3,mbuff4, count; 107 | wire [23:0] imply1, imply2; 108 | wire [47:0] mbuff1, mbuff5; 109 | 110 | assign mbuff3= e1-8'b01111111; 111 | assign mbuff4= e2-8'b01111111; 112 | 113 | assign mbuff2= mbuff3 + mbuff4; 114 | 115 | assign imply1= {1'b1,m1}; 116 | assign imply2= {1'b1,m2}; 117 | 118 | assign mbuff1= imply1 * imply2; 119 | 120 | assign count= ( mbuff1[47] == 1) ? 8'b00000001 : 8'b00000010; 121 | 122 | assign mbuff5= mbuff1 << count; 123 | 124 | assign e3 = (e1==8'b00000000) ? 8'b00000000: 125 | (e2==8'b00000000) ? 8'b00000000:mbuff2 - count +8'b00000010+8'b01111111; 126 | 127 | assign m3 = (e1==8'b00000000) ? 23'b0000000000000000000000: 128 | (e2==8'b00000000) ? 23'b0000000000000000000000:mbuff5 [47:25]; 129 | 130 | assign s3 = (e1==8'b00000000) ? 1'b0: 131 | (e1==8'b00000000) ? 1'b0: s1 ^ s2; 132 | 133 | endmodule 134 | 135 | 136 | module compare (flag, s1, s2, e1, e2, m1, m2); 137 | 138 | output [2:0] flag; 139 | input s1, s2; 140 | input [7:0] e1, e2; 141 | input [22:0] m1, m2; 142 | 143 | wire [1:0] sign, exp, mag; 144 | 145 | assign sign= {s1,s2}; 146 | 147 | assign exp= (e1 > e2) ? 2'b10: 148 | (e2 > e1) ? 2'b01: 2'b00; 149 | 150 | assign mag= (exp == 2'b00) ? ((m1 > m2) ? 2'b10: 151 | (m2 > m1) ? 2'b01: 2'b00): 2'b11; 152 | 153 | 154 | 155 | 156 | assign flag= (sign == 2'b00) ? ((exp == 2'b10) ? 3'b100: 157 | (exp == 2'b01) ? 3'b001: 158 | (mag == 2'b10) ? 3'b100: 159 | (mag == 2'b01) ? 3'b001:3'b010): 160 | (sign == 2'b11) ? ((exp == 2'b10) ? 3'b001: 161 | (exp == 2'b01) ? 3'b100: 162 | (mag == 2'b10) ? 3'b001: 163 | (mag == 2'b01) ? 3'b100:3'b010): 164 | (sign == 2'b10) ? 3'b100 : 3'b001; 165 | 166 | 167 | endmodule 168 | 169 | module modulo32 (s2, e2, m2, s1, e1, m1); 170 | 171 | output s2; 172 | output [7:0] e2; 173 | output [22:0] m2; 174 | input s1; 175 | input [7:0] e1; 176 | input [22:0] m1; 177 | 178 | wire [7:0] mbuff3, count; 179 | wire [23:0] imp1,imp2 ,imp3 ,imp4 ,imp5 ,imp6 ,n1 ,n2; 180 | 181 | assign s2=s1; 182 | assign mbuff3= e1-8'b01111111; 183 | assign imp1= {1'b1,m1}; 184 | 185 | assign count= (mbuff3 > 8'b00010111) ? (mbuff3 - 8'b00010111): 186 | (8'b00010111 - mbuff3); 187 | 188 | assign imp2= (mbuff3 > 8'b00010111) ? (imp1 << count):(imp1 >> count); 189 | 190 | assign n1= (mbuff3[7] == 1'b1) ? 24'b000000000000000000000000 : 191 | (24'b000000000000000000011111 & imp2); 192 | 193 | assign e2= (n1[4] == 1) ? 8'b10000011: 194 | (n1[3] == 1) ? 8'b10000010: 195 | (n1[2] == 1) ? 8'b10000001: 196 | (n1[1] == 1) ? 8'b10000000: 197 | (n1[0] == 1) ? 8'b01111111: 8'b00000000; 198 | 199 | assign n2= (n1[4] == 1) ? n1 << 8'b00010011: 200 | (n1[3] == 1) ? n1 << 8'b00010100: 201 | (n1[2] == 1) ? n1 << 8'b00010101: 202 | (n1[1] == 1) ? n1 << 8'b00010110: 203 | (n1[0] == 1) ? n1 << 8'b00010111: 24'b000000000000000000000000; 204 | 205 | 206 | assign m2= n2[22:0]; 207 | 208 | endmodule 209 | 210 | 211 | module round (s2, e2, m2, s1, e1, m1); 212 | 213 | output s2; 214 | output [7:0] e2; 215 | output [22:0] m2; 216 | input s1; 217 | input [7:0] e1; 218 | input [22:0] m1; 219 | 220 | wire mbuff1,mbuff2; 221 | wire [7:0] mbuff3, count, X, w; 222 | wire [23:0] imp1,imp2,imp3,imp4,imp5,imp6,imp7,a; 223 | 224 | 225 | assign mbuff3= (e1 - 8'b01111111); 226 | 227 | assign imp1= {1'b1,m1}; 228 | 229 | assign count = (8'b00010111 - mbuff3); 230 | 231 | assign imp2 = imp1 >> (count-8'b00000001); 232 | 233 | assign imp3= imp2 & 24'b00000000000000000000001; 234 | 235 | assign imp4= imp2 >> 1; 236 | 237 | assign imp5= imp4 + imp3; 238 | 239 | assign X = mbuff3+ 8'b00000001; 240 | 241 | assign w= ( (mbuff3< 8'b00010111) & ( imp5[X]== 1'b1) ) ? (mbuff3 +8'b00000001 + 8'b01111111 ) : 242 | ( (mbuff3< 8'b00010111) & ( imp5[X]== 1'b0) ) ? (mbuff3 +8'b01111111) : e1; 243 | 244 | assign imp6 = ( (mbuff3< 8'b00010111) & ( imp5[X]== 1'b1) ) ? 245 | (imp5 << count- 8'b00000001): 246 | ( (mbuff3< 8'b00010111) & ( imp5[X]== 1'b0) ) ? ( imp5< 8'b10000011) ? ( e1 - 8'b00000101 ) : 8'b00000000; 348 | 349 | assign m2 = m1; 350 | 351 | endmodule 352 | 353 | 354 | 355 | module program (outs ,oute ,outm ,xs ,xe ,xm ); 356 | 357 | output outs; 358 | output [7:0] oute; 359 | output [22:0] outm; 360 | input xs; 361 | input [7:0] xe; 362 | input [22:0] xm; 363 | 364 | wire lows,highs,ones,invs,ns,twoe9s,n1s,n2s,n2as,r1s,r1as,r1bs,r2s,l1s,l2s; 365 | wire a1s,a2s,ms,qs,ss,ps,rs,sleads,strails,e1s,nums; 366 | wire stemp19,stemp20,stemp21; 367 | wire stemp,stemp1,stemp2,stemp3,stemp4,stemp5,stemp6,stemp7,stemp8,stemp9; 368 | wire stemp10,stemp11,stemp12,stemp13,stemp14,stemp15,stemp16,stemp17,stemp18; 369 | wire [2:0] flag, flag2,flag3; 370 | wire [4:0] j; 371 | wire [7:0] lowe,highe,onee,inve,ne,twoe9e,n1e,n2e,n2ae,r1e,r1ae,r1be,r2e,l1e,l2e; 372 | wire [7:0] a1e,a2e,me,qe,se,pe,re,sleade,straile,e1e,nume,etemp19; 373 | wire [7:0] etemp,etemp1,etemp2,etemp3,etemp4,etemp5,etemp6,etemp7,etemp8,etemp9; 374 | wire [7:0] etemp10,etemp11,etemp12,etemp13,etemp14,etemp15,etemp16,etemp17,etemp18; 375 | wire [22:0] lowm,highm,onem,invm,nm,twoe9m,n1m,n2m,n2am,r1m,r1am,r1bm,r2m,l1m,l2m; 376 | wire [22:0] a1m,a2m,mm,qm,sm,pm,rm,sleadm,strailm,e1m,numm,mtemp19; 377 | wire [22:0] mtemp,mtemp1,mtemp2,mtemp3,mtemp4,mtemp5,mtemp6,mtemp7,mtemp8,mtemp9; 378 | wire [22:0] mtemp10,mtemp11,mtemp12,mtemp13,mtemp14,mtemp15,mtemp16,mtemp17,mtemp18; 379 | 380 | assign ones= 1'b0; 381 | assign onee= 8'b01111111; 382 | assign onem= 23'b00000000000000000000000; 383 | 384 | assign lows= 1'b0; 385 | assign lowe= 8'b01100110; 386 | assign lowm= 23'b00000000000000000000000; 387 | 388 | assign highs= 1'b0; 389 | assign highe= 8'b10000110; 390 | assign highm= 23'b10111000110101110111010; 391 | 392 | assign nums=1'b0; 393 | assign nume=8'b10000100; 394 | assign numm=23'b00000000000000000000000; 395 | 396 | assign invs=1'b0; 397 | assign inve=8'b10000100; 398 | assign invm=23'b01110001010101000111011; 399 | 400 | assign twoe9s=1'b0; 401 | assign twoe9e=8'b10001000; 402 | assign twoe9m=23'b00000000000000000000000; 403 | 404 | assign l1s=1'b0; 405 | assign l1e=8'b01111001; 406 | assign l1m=23'b01100010111001000000000; 407 | 408 | assign l2s=1'b0; 409 | assign l2e=8'b01100110; 410 | assign l2m=23'b01111111011111010001110; 411 | 412 | assign a1s=1'b0; 413 | assign a1e=8'b01111110; 414 | assign a1m=23'b01010101010101011101100; 415 | 416 | 417 | multiplication mul1(stemp,etemp,mtemp,invs,xs,inve,xe,invm,xm); 418 | round rou1(ns,ne,nm,stemp,etemp,mtemp); 419 | modulo32 mod1(stemp1,etemp1,mtemp1,ns,ne,nm); 420 | 421 | 422 | addition add1(n2as,n2ae,n2am,nums,stemp1,nume,etemp1,numm,mtemp1); 423 | assign n2s= (ns==1'b1) ? n2as : stemp1; 424 | assign n2e= (ns==1'b1) ? n2ae : etemp1; 425 | assign n2m= (ns==1'b1) ? n2am : mtemp1; 426 | 427 | 428 | assign stemp2= ~n2s; 429 | addition add2(n1s,n1e,n1m,ns,stemp2,ne,n2e,nm,n2m); 430 | assign stemp21= 1'b0; 431 | compare comp1(flag2,stemp21,twoe9s,ne,twoe9e,nm,twoe9m); 432 | 433 | multiplication mul2(stemp3,etemp3,mtemp3,ns,l1s,ne,l1e,nm,l1m); 434 | assign stemp4= ~stemp3; 435 | addition add3(r1as,r1ae,r1am,stemp4,xs,etemp3,xe,mtemp3,xm); 436 | 437 | multiplication mul3(stemp5,etemp5,mtemp5,n1s,l1s,n1e,l1e,n1m,l1m); 438 | assign stemp6= ~stemp5; 439 | addition add4(stemp7,etemp7,mtemp7,stemp6,xs,etemp5,xe,mtemp5,xm); 440 | assign stemp8= ~n2s; 441 | addition add5(stemp9,etemp9,mtemp9,stemp8,stemp7,n2e,etemp7,n2m,mtemp7); 442 | multiplication mul4(r1bs,r1be,r1bm,stemp9,l1s,etemp9,l1e,mtemp9,l1m); 443 | assign r1s= (flag2 == 3'b001) ? r1as : r1bs; 444 | assign r1e= (flag2 == 3'b001) ? r1ae : r1be; 445 | assign r1m= (flag2 == 3'b001) ? r1am : r1bm; 446 | 447 | assign stemp10= ~ns; 448 | multiplication mul5(r2s,r2e,r2m,stemp10,l2s,ne,l2e,nm,l2m); 449 | divide d1(ms,me,mm,n1s,n1e,n1m); 450 | addition add6(rs,re,rm,r1s,r2s,r1e,r2e,r1m,r2m); 451 | multiplication mul6(stemp11,etemp11,mtemp11,rs,a2s,re,a2e,rm,a2m); 452 | addition add7(stemp12,etemp12,mtemp12,stemp11,a1s,etemp11,a1e,mtemp11,a1m); 453 | multiplication mul7(stemp13,etemp13,mtemp13,rs,rs,re,re,rm,rm); 454 | multiplication mul8(qs,qe,qm,stemp13,stemp12,etemp13,etemp12,mtemp13,mtemp12); 455 | addition add8(stemp14,etemp14,mtemp14,r2s,qs,r2e,qe,r2m,qm); 456 | addition add9(ps,pe,pm,stemp14,r1s,etemp14,r1e,mtemp14,r1m); 457 | get_j get1(j,n2s,n2e,n2m); 458 | 459 | 460 | 461 | assign sleads = 1'b0; 462 | assign strails = 1'b0; 463 | 464 | assign sleade = 8'b01111111; 465 | 466 | assign straile= (j == 5'b00000) ? 8'b00000000: 467 | (j == 5'b00001) ? 8'b01101010: 468 | (j == 5'b00010) ? 8'b01101001: 469 | (j == 5'b00011) ? 8'b01101011: 470 | (j == 5'b00100) ? 8'b01101000: 471 | (j == 5'b00101) ? 8'b01101101: 472 | (j == 5'b00110) ? 8'b01101100: 473 | (j == 5'b00111) ? 8'b01101101: 474 | (j == 5'b01000) ? 8'b01101101: 475 | (j == 5'b01001) ? 8'b01101101: 476 | (j == 5'b01010) ? 8'b01101101: 477 | (j == 5'b01011) ? 8'b01101001: 478 | (j == 5'b01100) ? 8'b01101100: 479 | (j == 5'b01101) ? 8'b01101100: 480 | (j == 5'b01110) ? 8'b01101101: 481 | (j == 5'b01111) ? 8'b01101101: 482 | (j == 5'b10000) ? 8'b01101101: 483 | (j == 5'b10001) ? 8'b01101101: 484 | (j == 5'b10010) ? 8'b01101101: 485 | (j == 5'b10011) ? 8'b01101011: 486 | (j == 5'b10100) ? 8'b01101101: 487 | (j == 5'b10101) ? 8'b01101101: 488 | (j == 5'b10110) ? 8'b01101011: 489 | (j == 5'b10111) ? 8'b01101100: 490 | (j == 5'b11000) ? 8'b01101101: 491 | (j == 5'b11001) ? 8'b01101101: 492 | (j == 5'b11010) ? 8'b01101100: 493 | (j == 5'b11011) ? 8'b01101010: 494 | (j == 5'b11100) ? 8'b01101010: 495 | (j == 5'b11101) ? 8'b01101101: 496 | (j == 5'b11110) ? 8'b01101101: 8'b01101101; 497 | 498 | assign sleadm = (j == 5'b00000) ? 23'b00000000000000000000000: 499 | (j == 5'b00001) ? 23'b00000101100110110000000: 500 | (j == 5'b00010) ? 23'b00001011010101011000000: 501 | (j == 5'b00011) ? 23'b00010001001100000000000: 502 | (j == 5'b00100) ? 23'b00010111001010111000000: 503 | (j == 5'b00101) ? 23'b00011101010010000000000: 504 | (j == 5'b00110) ? 23'b00100011100001111000000: 505 | (j == 5'b00111) ? 23'b00101001111010011000000: 506 | (j == 5'b01000) ? 23'b00110000011011111000000: 507 | (j == 5'b01001) ? 23'b00110111000110100000000: 508 | (j == 5'b01010) ? 23'b00111101111010100000000: 509 | (j == 5'b01011) ? 23'b01000100111000001000000: 510 | (j == 5'b01100) ? 23'b01001011111111011000000: 511 | (j == 5'b01101) ? 23'b01010011010000101000000: 512 | (j == 5'b01110) ? 23'b01011010101100000000000: 513 | (j == 5'b01111) ? 23'b01100010010001111000000: 514 | (j == 5'b10000) ? 23'b01101010000010011000000: 515 | (j == 5'b10001) ? 23'b01110001111101110000000: 516 | (j == 5'b10010) ? 23'b01111010000100010000000: 517 | (j == 5'b10011) ? 23'b10000010010110001000000: 518 | (j == 5'b10100) ? 23'b10001010110011100000000: 519 | (j == 5'b10101) ? 23'b10010011011100110000000: 520 | (j == 5'b10110) ? 23'b10011100010010010000000: 521 | (j == 5'b10111) ? 23'b10100101010100000000000: 522 | (j == 5'b11000) ? 23'b10101110100010011000000: 523 | (j == 5'b11001) ? 23'b10110111111101110000000: 524 | (j == 5'b11010) ? 23'b11000001100110011000000: 525 | (j == 5'b11011) ? 23'b11001011011100100000000: 526 | (j == 5'b11100) ? 23'b11010101100000011000000: 527 | (j == 5'b11101) ? 23'b11011111110010010000000: 528 | (j == 5'b11110) ? 23'b11101010010010101000000: 529 | 23'b11110101000001110000000; 530 | 531 | assign strailm= (j == 5'b00000) ? 23'b00000000000000000000000: 532 | (j == 5'b00001) ? 23'b10100110001010110000101: 533 | (j == 5'b00010) ? 23'b10110011111001100010010: 534 | (j == 5'b00011) ? 23'b11010000000100100101110: 535 | (j == 5'b00100) ? 23'b11100011111010101000110: 536 | (j == 5'b00101) ? 23'b11001100010110100010111: 537 | (j == 5'b00110) ? 23'b00110111001110101011001: 538 | (j == 5'b00111) ? 23'b01111101010001111111100: 539 | (j == 5'b01000) ? 23'b10000010100011000110111: 540 | (j == 5'b01001) ? 23'b11001101110011101010101: 541 | (j == 5'b01010) ? 23'b10010011000001001000111: 542 | (j == 5'b01011) ? 23'b10000001100001100010010: 543 | (j == 5'b01100) ? 23'b01101010100110110001011: 544 | (j == 5'b01101) ? 23'b10101011010011101010100: 545 | (j == 5'b01110) ? 23'b11110111010100100001011: 546 | (j == 5'b01111) ? 23'b10101100000011101001011: 547 | (j == 5'b10000) ? 23'b10011001100111111100111: 548 | (j == 5'b10001) ? 23'b01111010001110110001100: 549 | (j == 5'b10010) ? 23'b00011100111110101100000: 550 | (j == 5'b10011) ? 23'b10011001010011001100111: 551 | (j == 5'b10100) ? 23'b01010000100010101010100: 552 | (j == 5'b10101) ? 23'b11101100001100110111001: 553 | (j == 5'b10110) ? 23'b10000010101000111111000: 554 | (j == 5'b10111) ? 23'b11011001000111110001001: 555 | (j == 5'b11000) ? 23'b11100110010101101011010: 556 | (j == 5'b11001) ? 23'b10111100101111101101100: 557 | (j == 5'b11010) ? 23'b11101110110000101010101: 558 | (j == 5'b11011) ? 23'b10111001110111110010000: 559 | (j == 5'b11100) ? 23'b10111001111101110100101: 560 | (j == 5'b11101) ? 23'b11001100110111101110011: 561 | (j == 5'b11110) ? 23'b11101000101010010010010: 562 | 23'b10010110110110111001001; 563 | 564 | 565 | 566 | addition add10(ss,se,sm,sleads,strails,sleade,straile,sleadm,strailm); 567 | multiplication mul9(stemp15,etemp15,mtemp15,ss,ps,se,pe,sm,pm); 568 | addition add11(stemp16,etemp16,mtemp16,stemp15,strails,etemp15,straile,mtemp15,strailm); 569 | addition add12(e1s,e1e,e1m,sleads,stemp16,sleade,etemp16,sleadm,mtemp16); 570 | powertwo p1(stemp17,etemp17,mtemp17,ms,me,mm); 571 | multiplication mul10(stemp18,etemp18,mtemp18,stemp17,e1s,etemp17,e1e,mtemp17,e1m); 572 | /* 573 | assign stemp20= 1'b0; 574 | 575 | compare comp2(flag,stemp20,highs,xe,highe,xm,highm); 576 | compare comp3(flag3,stemp20,lows,xe,lowe,xm,lowm); 577 | addition add13(stemp19,etemp19,mtemp19,ones,xs,onee,xe,onem,xm); 578 | 579 | assign outs=(xe == 8'b11111111) ? 580 | ((xm == 23'b00000000000000000000000)? ((xs==1'b0)? 1'b0:1'b0): 581 | "x"):(flag == 3'b001) ? stemp19 : 582 | (flag3 == 3'b100) ? 1'b0 : stemp18; 583 | 584 | assign oute=(xe == 8'b11111111) ? 585 | ((xm == 23'b00000000000000000000000)? ((xs==1'b0)? 8'b11111111:8'b00000000): 586 | "xxxxxxxx"):(flag == 3'b001) ? etemp19 : 587 | (flag3 == 3'b100) ? 8'b11111111 : etemp18; 588 | 589 | assign outm=(xe == 8'b11111111) ? 590 | ((xm == 23'b00000000000000000000000)? ((xs==1'b0)? 23'b00000000000000000000000:23'b00000000000000000000000): 591 | "xxxxxxxxxxxxxxxxxxxxxxx"):(flag == 3'b001) ? mtemp19 : 592 | (flag3 == 3'b100) ? 23'b00000000000000000000000:mtemp18; 593 | 594 | */ 595 | assign outs= stemp18; 596 | assign oute= etemp18; 597 | assign outm= mtemp18; 598 | 599 | 600 | endmodule 601 | 602 | 603 | module test_exp(); 604 | 605 | reg [31:0] x; 606 | wire [31:0] y; 607 | 608 | wire xs; 609 | wire [7:0] xe; 610 | wire [22:0] xm; 611 | 612 | wire ys; 613 | wire [7:0] ye; 614 | wire [22:0] ym; 615 | 616 | assign xs = x[31]; 617 | assign xe = x[30:23]; 618 | assign xm = x[22:0]; 619 | 620 | assign ys = y[31]; 621 | assign ye = y[30:23]; 622 | assign ym = y[22:0]; 623 | 624 | program p(y[31],y[30:23],y[22:0],x[31],x[30:23],x[22:0]); 625 | 626 | initial begin 627 | $dumpfile("exp.vcd"); 628 | $dumpvars(0, test_exp); 629 | x = 32'h38a00000; 630 | #50000; 631 | $display("%H", y); 632 | end 633 | 634 | endmodule 635 | -------------------------------------------------------------------------------- /filters/net.filter: -------------------------------------------------------------------------------- 1 | 000 L1_S 2 | 001 L1 3 | 010 L2_S 4 | 011 L2 5 | 100 DONE 6 | -------------------------------------------------------------------------------- /filters/sigmoid.filter: -------------------------------------------------------------------------------- 1 | 0 ADD_START 2 | 1 ADD 3 | 2 DIV_START 4 | 3 DIV 5 | 4 ADD_2_START 6 | 5 ADD_2 7 | 6 MUL_START 8 | 7 MUL 9 | 8 DONE 10 | -------------------------------------------------------------------------------- /guard.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ "$#" -ne 0 ]; then 3 | file=$1; 4 | if [ -f "$file" ]; then 5 | filename=${file##*/}; 6 | guard=$(echo __$(echo ${filename^^} | tr . _)__ ); 7 | echo -e "\`ifndef $guard\n\`define $guard\n$(cat $file)\n\`endif" > $file; 8 | else 9 | echo "NOT A FILE" 10 | fi 11 | else 12 | echo "Apply Guard to All Files in Directory? [y/N]" 13 | read yes 14 | if [ "$yes" = "y" ]; then 15 | for file in *.v; do 16 | filename=${file##*/}; 17 | guard=$(echo __$(echo ${filename^^} | tr . _)__ ); 18 | echo -e "\`ifndef $guard\n\`define $guard\n$(cat $file)\n\`endif" > $file; 19 | done 20 | fi 21 | fi 22 | -------------------------------------------------------------------------------- /input_conditioner.v: -------------------------------------------------------------------------------- 1 | `ifndef __INPUT_CONDITIONER_V__ 2 | `define __INPUT_CONDITIONER_V__ 3 | //------------------------------------------------------------------------ 4 | // Input Conditioner 5 | // 1) Synchronizes input to clock domain 6 | // 2) Debounces input 7 | // 3) Creates pulses at edge transitions 8 | // Taken from Lab 2 9 | //------------------------------------------------------------------------ 10 | 11 | module input_conditioner 12 | ( 13 | input clk, // Clock domain to synchronize input to 14 | input noisysignal, // (Potentially) noisy input signal 15 | output reg conditioned, // Conditioned output signal 16 | output reg positiveedge, // 1 clk pulse at rising edge of conditioned 17 | output reg negativeedge // 1 clk pulse at falling edge of conditioned 18 | ); 19 | 20 | parameter counterwidth = 3; // Counter size, in bits, >= log2(waittime) 21 | parameter waittime = 3; // Debounce delay, in clock cycles 22 | 23 | reg[counterwidth-1:0] counter = 0; 24 | reg synchronizer0 = 0; 25 | reg synchronizer1 = 0; 26 | 27 | always @(posedge clk ) begin 28 | 29 | // posedge, negedge defaults to 0 30 | positiveedge <= 0; 31 | negativeedge <= 0; 32 | 33 | if(conditioned == synchronizer1) 34 | counter <= 0; 35 | else begin 36 | if( counter == waittime) begin 37 | counter <= 0; 38 | conditioned <= synchronizer1; 39 | positiveedge <= synchronizer1; 40 | negativeedge <= !synchronizer1; 41 | end 42 | else begin 43 | counter <= counter+1; 44 | end 45 | end 46 | synchronizer0 <= noisysignal; 47 | synchronizer1 <= synchronizer0; 48 | end 49 | endmodule 50 | `endif 51 | -------------------------------------------------------------------------------- /lerp.v: -------------------------------------------------------------------------------- 1 | `ifndef __LERP_V__ 2 | `define __LERP_V__ 3 | `include "div_float.v" 4 | `include "add_float.v" 5 | `include "mul_float.v" 6 | 7 | module lerp 8 | #(parameter S=32) 9 | ( 10 | input clk, 11 | input start, 12 | input rst_n, 13 | input [31:0] x1, 14 | input [31:0] x2, 15 | input [31:0] y1, 16 | input [31:0] y2, 17 | input [31:0] x, 18 | output [31:0] y, 19 | output done 20 | ); 21 | 22 | // x --> (x-x1),(x2-x1),(y2-y1) --> (x-x1)/(x2-x1) -> (x-x1)/(x2-x1)*(y2-y1) 23 | // --> + y1 24 | 25 | reg [31:0] xsx1; 26 | reg [31:0] x2sx1; 27 | reg [31:0] y2sy1; 28 | reg [31:0] dxx; // (x-x1)/(x2-x1) 29 | 30 | reg [2:0] sub_done; 31 | reg [1:0] div_done; 32 | 33 | // Phase 1 34 | add_float #(.FLOAT_WIDTH(S)) s1(rst_n, clk, start, 1'b1, x, x1, xsx1, nan, overflow, underflow, zero, sub_done[0]); 35 | add_float #(.FLOAT_WIDTH(S)) s2(rst_n, clk, start, 1'b1, x2, x1, x2sx1, nan, overflow, underflow, zero, sub_done[1]); 36 | add_float #(.FLOAT_WIDTH(S)) s3(rst_n, clk, start, 1'b1, y2, y1, y2sy1, nan, overflow, underflow, zero, sub_done[2]); 37 | 38 | div_float #(.FLOAT_WIDTH(S)) d1(rst_n, clk, start, x1, `DELTA, dxx, zero, nan, overflow, underflow, zero_reg, div_done[0]); 39 | 40 | // Phase 2 41 | div_float #(.FLOAT_WIDTH(S)) d2(rst_n, clk, &sub_done, xsx1, x2sx1, zero, nan, overflow, underflow, zero_reg, div_done[1]); 42 | 43 | mul_float #(.FLOAT_WIDTH(S)) m1(rst_n, clk, div_done[1], x 44 | `endif 45 | -------------------------------------------------------------------------------- /matmul.t.v: -------------------------------------------------------------------------------- 1 | `ifndef __MATMUL_T_V__ 2 | `define __MATMUL_T_V__ 3 | 4 | `include "matmul.v" 5 | 6 | `define HEIGHT 4 7 | `define WIDTH 1 8 | `define COMMON 2 9 | 10 | 11 | module test_matmul(); 12 | 13 | task print_mat; 14 | parameter height = 1; 15 | parameter width = 1; 16 | input [height*width*32-1:0] mat; 17 | integer i,j; 18 | begin 19 | $display("-----------------"); 20 | for(i=0; i add 30 | wire add_start = (stage == 1); 31 | wire add_rst_n = (stage == 2); 32 | 33 | always @(negedge clk) begin 34 | if(rst_n == 0 | start) begin 35 | stage <= 0; 36 | end 37 | end 38 | 39 | always @(posedge clk) begin 40 | case(stage) 41 | 0: begin 42 | // accum left, right 43 | if(done_l && done_r) begin 44 | stage <= stage+1; 45 | end 46 | end 47 | 1: begin 48 | // add-start 49 | stage <= stage+1; 50 | end 51 | 2: begin 52 | if(add_done) begin 53 | stage <= stage+1; 54 | end 55 | end 56 | 3: begin 57 | 58 | end 59 | default: begin 60 | 61 | end 62 | endcase 63 | 64 | end 65 | 66 | wire nan, overflow, underflow, zero; // don't really care for now 67 | wire done_l, done_r; 68 | wire add_done; 69 | 70 | if(C == 1) begin 71 | assign done = 1'b1; 72 | // direct assignment 73 | assign O = I; 74 | end else begin 75 | wire [S-1:0] o_l; 76 | wire [S-1:0] o_r; 77 | 78 | accumulate #(.S(S), .C(C-X)) ac_l(rst_n, clk, start, I[S*C-1:S*X], o_l, done_l); // accumulate left side 79 | accumulate #(.S(S), .C(X)) ac_r(rst_n, clk, start, I[S*X-1:0], o_r, done_r); // accumulate right side 80 | add_float #(.FLOAT_WIDTH(S)) add(add_rst_n, clk, add_start, 1'b0, o_l, o_r, O, nan, overflow, underflow, zero, add_done); 81 | assign done = (stage == 3); 82 | end 83 | endmodule 84 | 85 | module matmul // size = 32 bits, width, height, common 86 | #(parameter S=32, W=2, H=2, C=2) 87 | ( 88 | // H x W 89 | // 2x5 * 5x3 = 2x3 90 | // H*C * C*W = H*W 91 | // row major 92 | input rst_n, 93 | input clk, 94 | input start, 95 | 96 | input [S*H*C-1:0] a, 97 | input [S*C*W-1:0] b, 98 | output [S*H*W-1:0] o, 99 | output done 100 | ); 101 | 102 | reg [2:0] stage = 0; 103 | 104 | wire mul_start = (stage == 0); 105 | wire accum_start = (stage == 2); 106 | always @(negedge clk) begin 107 | if(start) 108 | stage = 0; 109 | end 110 | always @(posedge clk) begin 111 | if(start) 112 | stage = 0; 113 | else begin 114 | case(stage) 115 | 0: begin 116 | stage = stage + 1; 117 | end 118 | 1: begin 119 | if(&mult_all_done) 120 | stage = stage + 1; 121 | end 122 | 2: begin 123 | stage = stage + 1; 124 | end 125 | 3: begin 126 | if(&add_done) 127 | stage = stage + 1; 128 | end 129 | 4: begin 130 | 131 | end 132 | 133 | endcase 134 | end 135 | end 136 | 137 | wire nan; 138 | wire overflow; 139 | wire underflow; 140 | wire zero; 141 | 142 | wire [H*W-1:0] add_done; 143 | wire [H*W-1:0] mult_all_done; 144 | 145 | genvar i,j,k; 146 | integer l; 147 | 148 | generate 149 | 150 | for(i=0; ioutputs stored to C-length array o_tmp 159 | // debugging 160 | //always @(o_tmp) begin 161 | // if(i == 0 && j == 1) begin 162 | // $write("(%d, %d) * (%d,%d)", i, k, k, j); 163 | // $write("%H * ", `ELEM(a,i,k,H,C,S)); 164 | // $write("%H = ", `ELEM(b,k,j,C,W,S)); 165 | // $write("%H", `ELEM(o_tmp,0,k,1,C,S)); 166 | // end 167 | //end 168 | end 169 | 170 | assign mult_all_done[i*W+j] = &mult_done; 171 | 172 | // accumulate 173 | accumulate #(.S(32), .C(C)) acc(rst_n, clk, accum_start, o_tmp, `ELEM(o,j,i,W,H,S), add_done[i*W+j]); 174 | end 175 | end 176 | 177 | endgenerate 178 | 179 | assign done = &add_done; // only done when all elements are completed 180 | 181 | endmodule 182 | `endif 183 | -------------------------------------------------------------------------------- /mul_float.t.v: -------------------------------------------------------------------------------- 1 | `ifndef __MUL_FLOAT_T_V__ 2 | `define __MUL_FLOAT_T_V__ 3 | `include "mul_float.v" 4 | 5 | module test_mul(); 6 | 7 | reg rst_n; 8 | reg clk=0; 9 | reg start; 10 | 11 | reg [31:0] op1; 12 | reg [31:0] op2; 13 | 14 | wire [31:0] out; 15 | wire nan; 16 | wire overflow; 17 | wire underflow; 18 | wire zero; 19 | wire done; 20 | 21 | mul_float #(.FLOAT_WIDTH(32)) dut( 22 | //inputs 23 | .rst_n(rst_n), 24 | .clk(clk), 25 | .start(start), 26 | .op1(op1), 27 | .op2(op2), 28 | //outputs 29 | .out_reg(out), 30 | .nan_reg(nan), 31 | .overflow_reg(overflow), 32 | .underflow_reg(underflow), 33 | .zero_reg(zero), 34 | .done_reg(done) 35 | ); 36 | 37 | always begin 38 | #10 39 | clk = !clk; 40 | end 41 | 42 | always @(posedge done) begin 43 | $display("a b o"); 44 | $display("%H %H %H", op1, op2, out); 45 | end 46 | 47 | initial begin 48 | 49 | $dumpfile("mul_float.vcd"); 50 | $dumpvars(0, test_mul); 51 | 52 | rst_n = 1'b0; 53 | @(negedge clk); 54 | op1 = 32'h40a00000; 55 | op2 = 32'h40a00000; 56 | start = 1; 57 | @(negedge clk); 58 | start = 0; 59 | rst_n = 1'b1; 60 | #500; 61 | $finish; 62 | end 63 | 64 | 65 | endmodule 66 | `endif 67 | -------------------------------------------------------------------------------- /mul_float.v: -------------------------------------------------------------------------------- 1 | `ifndef __MUL_FLOAT_V__ 2 | `define __MUL_FLOAT_V__ 3 | module mul_float 4 | #(parameter 5 | FLOAT_WIDTH = 64 6 | ) 7 | ( 8 | input wire rst_n, clk, start, 9 | input wire [FLOAT_WIDTH - 1: 0] op1, op2, 10 | output reg [FLOAT_WIDTH - 1: 0] out_reg, 11 | output reg nan_reg, 12 | output reg overflow_reg, 13 | output reg underflow_reg, 14 | output reg zero_reg, 15 | output reg done_reg 16 | ); 17 | localparam EXP_WIDTH = (FLOAT_WIDTH == 64) ? 11: 8; 18 | localparam FRACTION_WIDTH = (FLOAT_WIDTH == 64) ? 52: 23; 19 | localparam FULL_FRACTION_WIDTH = FRACTION_WIDTH + 3; 20 | localparam SIGN_BIT = FLOAT_WIDTH - 1; 21 | localparam EXP_MSB = SIGN_BIT - 1; 22 | localparam EXP_LSB = EXP_MSB - EXP_WIDTH + 1; 23 | localparam EXP_SHIFT = (2 ** (EXP_WIDTH - 1)) - 1; 24 | localparam EXP_MAX = (2 ** (EXP_WIDTH)) - 1; 25 | localparam FRACTION_MSB = EXP_LSB - 1; 26 | localparam NAN_VALUE = (FLOAT_WIDTH == 64) ? 64'h7FF8_0000_0000_0000: 32'hFFC0_0000; 27 | localparam INF_VALUE = (FLOAT_WIDTH == 64) ? 64'h7FF0_0000_0000_0000: 32'h7F80_0000; 28 | localparam PRODUCT_WIDTH = (FRACTION_WIDTH + 1) * 2; 29 | localparam STAGE_REG_WIDTH = 2; 30 | localparam MAX_STAGE_REG = 2; 31 | 32 | wire [EXP_WIDTH - 1: 0] exp1 = op1[EXP_MSB: EXP_LSB], 33 | exp2 = op2[EXP_MSB: EXP_LSB]; 34 | 35 | wire [FRACTION_WIDTH: 0] frac1 = {1'b1, op1[FRACTION_MSB: 0]}, 36 | frac2 = {1'b1, op2[FRACTION_MSB: 0]}; 37 | 38 | reg [STAGE_REG_WIDTH - 1: 0] stage_reg; 39 | wire [STAGE_REG_WIDTH - 1: 0] next_stage = stage_reg + 1; 40 | 41 | always@(posedge clk or negedge rst_n) 42 | begin: stage_inc 43 | if(!rst_n) 44 | stage_reg <= 0; 45 | else 46 | begin 47 | if(start) 48 | stage_reg <= 0; 49 | else if(stage_reg < MAX_STAGE_REG) 50 | stage_reg <= next_stage; 51 | end 52 | end 53 | 54 | reg [EXP_WIDTH + 1: 0] full_exp_sum_reg, full_exp_sum_after_correction_reg; //full exp sum has two additional bits 55 | reg [PRODUCT_WIDTH - 1: 0] full_frac_reg; 56 | reg [FRACTION_WIDTH + 1: 0] frac_res_before_rounding_reg; 57 | 58 | always@(posedge clk or negedge rst_n) 59 | begin 60 | if(!rst_n) 61 | begin 62 | 63 | end 64 | else 65 | begin 66 | if(stage_reg == 0) 67 | begin 68 | full_exp_sum_reg <= exp1 + exp2 - EXP_SHIFT; 69 | full_frac_reg <= frac1 * frac2; 70 | end 71 | else if(stage_reg == 1) 72 | begin 73 | //exp correction must be undertaken 74 | full_exp_sum_after_correction_reg <= full_exp_sum_reg + full_frac_reg[PRODUCT_WIDTH - 1]; 75 | frac_res_before_rounding_reg <= full_frac_reg[PRODUCT_WIDTH - 1]? full_frac_reg[PRODUCT_WIDTH - 1: PRODUCT_WIDTH - FRACTION_WIDTH - 2] : full_frac_reg[PRODUCT_WIDTH - 2: PRODUCT_WIDTH - FRACTION_WIDTH - 3]; 76 | end 77 | // it is not the end... 78 | end 79 | end 80 | 81 | wire [FRACTION_WIDTH + 1: 0] frac_res_after_rounding = frac_res_before_rounding_reg + 1; 82 | wire [FRACTION_WIDTH - 1: 0] frac_res = frac_res_after_rounding[FRACTION_WIDTH: 1]; 83 | wire sign1 = op1[SIGN_BIT], 84 | sign2 = op2[SIGN_BIT]; 85 | wire sign_res = sign1 ^ sign2; 86 | wire [EXP_WIDTH - 1: 0] exp_res = full_exp_sum_after_correction_reg[EXP_WIDTH - 1: 0]; 87 | wire 88 | is_zero1 = (op1 & INF_VALUE) == 0, 89 | is_zero2 = (op2 & INF_VALUE) == 0, 90 | is_nan1 = &exp1 && (op1[FRACTION_WIDTH - 1: 0] != 0), 91 | is_nan2 = &exp2 && (op2[FRACTION_WIDTH - 1: 0] != 0), 92 | is_inf1 = &exp1 && (op1[FRACTION_WIDTH - 1: 0] == 0), 93 | is_inf2 = &exp2 && (op2[FRACTION_WIDTH - 1: 0] == 0), 94 | is_inf_result = (full_exp_sum_after_correction_reg[EXP_WIDTH + 1: EXP_WIDTH] == 2'b01) || ((full_exp_sum_after_correction_reg[EXP_WIDTH + 1: EXP_WIDTH] == 2'b00) && (&full_exp_sum_after_correction_reg[EXP_WIDTH - 1: 0])), 95 | is_nan_result = is_nan1 || is_nan2 || (is_zero1 && is_inf2) || (is_inf1 && is_zero2), 96 | is_overflow_result = is_inf_result && !(is_inf1 || is_inf2) && !is_nan_result, 97 | is_underflow_result = (full_exp_sum_after_correction_reg[EXP_WIDTH + 1] || (exp_res == 0)) && !(is_zero1 || is_zero2) && !is_overflow_result && !is_nan_result, 98 | is_zero_result = (is_zero1 || is_zero2 || (exp_res == 0) || is_underflow_result) && !is_overflow_result && !is_nan_result; 99 | 100 | always@(posedge clk or negedge rst_n) 101 | begin: result_out 102 | if(!rst_n) 103 | begin 104 | out_reg <= 0; 105 | end 106 | else 107 | if(stage_reg == 2) 108 | begin 109 | if(is_nan_result) 110 | begin 111 | out_reg <= NAN_VALUE; 112 | end 113 | else if(is_zero_result) 114 | begin 115 | out_reg <= {sign_res, {(FRACTION_WIDTH + EXP_WIDTH){1'b0}}}; 116 | end 117 | else if(is_inf_result) 118 | begin 119 | out_reg <= {sign_res, {EXP_WIDTH{1'b1}}, {FRACTION_WIDTH{1'b0}}}; 120 | end 121 | else 122 | begin 123 | out_reg <= {sign_res, exp_res, frac_res}; 124 | end 125 | end 126 | end 127 | 128 | always@(posedge clk or negedge rst_n) 129 | begin: done_out 130 | if(!rst_n) 131 | begin 132 | done_reg <= 0; 133 | end 134 | else 135 | begin 136 | done_reg <= stage_reg == MAX_STAGE_REG; //done 137 | end 138 | end 139 | 140 | always@(posedge clk or negedge rst_n) 141 | begin: aux_outs 142 | if(!rst_n) 143 | begin 144 | nan_reg <= 0; 145 | overflow_reg <= 0; 146 | underflow_reg <= 0; 147 | zero_reg <= 0; 148 | end 149 | else 150 | begin 151 | if(stage_reg == 2) 152 | begin 153 | nan_reg <= is_nan_result; 154 | overflow_reg <= is_overflow_result; 155 | underflow_reg <= is_underflow_result; 156 | zero_reg <= is_zero_result; 157 | end 158 | end 159 | end 160 | 161 | endmodule 162 | `endif 163 | -------------------------------------------------------------------------------- /net.t.v: -------------------------------------------------------------------------------- 1 | `ifndef __NET_T_V__ 2 | `define __NET_T_V__ 3 | `include "net.v" 4 | 5 | `define INPUT 2 6 | `define HIDDEN 4 7 | `define OUTPUT 1 8 | 9 | module test_net(); 10 | 11 | task test_xor; 12 | input [31:0] a; 13 | input [31:0] b; 14 | begin 15 | rst_n = 1'b0; 16 | #100 17 | @(negedge clk); 18 | x = {a,b}; 19 | start = 1'b1; 20 | @(negedge clk); 21 | rst_n = 1'b1; 22 | start = 1'b0; 23 | @(posedge done); 24 | end 25 | endtask 26 | 27 | 28 | reg clk = 0; 29 | always begin 30 | #10 31 | clk = !clk; 32 | end 33 | 34 | reg rst_n; 35 | reg start; 36 | 37 | reg [32*`INPUT*1-1:0] x; 38 | wire [32*`OUTPUT*1-1:0] y; 39 | 40 | wire [31:0] zero = 32'h00000000; 41 | wire [31:0] one = 32'h3f800000; 42 | 43 | wire done; 44 | 45 | net #(.I(`INPUT), .O(`OUTPUT), .H(`HIDDEN), .D(1)) n(clk, rst_n, start, x, y, done); 46 | 47 | always @(posedge done) begin 48 | $display("%H ^ %H = %H", x[31:0], x[63:32], y); 49 | end 50 | 51 | initial begin 52 | $dumpfile("net.vcd"); 53 | $dumpvars(0, test_net); 54 | #500; 55 | test_xor(zero,zero); // --> 0 56 | #500; 57 | test_xor(zero,one); // --> 1 58 | #500; 59 | test_xor(one,zero); // --> 1 60 | #500; 61 | test_xor(one,one); // --> 0 62 | #500; 63 | $finish; 64 | end 65 | 66 | endmodule 67 | 68 | `endif 69 | -------------------------------------------------------------------------------- /net.v: -------------------------------------------------------------------------------- 1 | `ifndef __NET_V__ 2 | `define __NET_V__ 3 | 4 | `include "sigmoid.v" 5 | `include "matmul.v" 6 | `include "add_float.v" 7 | 8 | /// vectorized float addition 9 | 10 | module add_float_v 11 | #(parameter S=32, N=1) 12 | ( 13 | input rst_n, 14 | input clk, 15 | input start, 16 | input [S*N-1:0] a, 17 | input [S*N-1:0] b, 18 | output [S*N-1:0] o, 19 | output done 20 | ); 21 | 22 | wire nan, overflow, underflow, zero; // don't care 23 | 24 | wire [N-1:0] done_elem; 25 | 26 | genvar i; 27 | generate 28 | for(i=0;iY : %H", y); 126 | stage = stage + 1; 127 | end 128 | end 129 | 6: begin 130 | //stay at 6 131 | end 132 | default: begin 133 | 134 | end 135 | endcase 136 | end 137 | 138 | matmul #(.S(S), .W(1), .H(O), .C(I)) m(mul_rst_n, clk, mul_start, W, x, o_1, mul_done); 139 | add_float_v #(.S(S), .N(O)) add(add_rst_n, clk, add_start, o_1, b, o_2, add_done); // o_1 -(+b)-> o_2 140 | sigmoid #(.S(S), .N(O)) sig(clk, sig_rst_n, sig_start, o_2, y, sig_done); //o_2 -(sig())-> y 141 | 142 | endmodule 143 | 144 | 145 | module net 146 | #( 147 | parameter I = 784, 148 | parameter O = 10, 149 | parameter H = 75, 150 | parameter D = 1 // depth of array 151 | ) 152 | ( 153 | input clk, 154 | input rst_n, 155 | input start, 156 | input [I*32-1:0] x, 157 | output [O*32-1:0] y, 158 | output done 159 | ); 160 | 161 | localparam S = 32; 162 | 163 | reg [2:0] stage = 0; 164 | 165 | wire [H*32-1:0] o_1; // intermediate unit for hidden layer 166 | wire done_1, done_2; 167 | 168 | layer #(.S(S), .I(I), .O(H)) l_1(clk, l_1_rst_n, start_1, x, o_1, done_1); 169 | layer #(.S(S), .I(H), .O(O)) l_2(clk, l_2_rst_n, start_2, o_1, y, done_2); 170 | 171 | initial begin 172 | $readmemh("data/w1.txt", l_1._W); 173 | $readmemh("data/b1.txt", l_1._b); 174 | $readmemh("data/w2.txt", l_2._W); 175 | $readmemh("data/b2.txt", l_2._b); 176 | end 177 | 178 | wire l_1_rst_n = !start_1; 179 | wire l_2_rst_n = !start_2; 180 | 181 | wire start_1 = (stage == 0); 182 | wire start_2 = (stage == 2); 183 | 184 | assign done = (stage == 4); 185 | 186 | always @(posedge clk) begin 187 | if(start) begin 188 | stage = 0; 189 | end else begin 190 | 191 | case(stage) 192 | 0: begin 193 | stage = stage + 1; 194 | end 195 | 1: begin 196 | if(done_1) 197 | stage = stage + 1; 198 | end 199 | 2: begin 200 | stage = stage + 1; 201 | end 202 | 3: begin 203 | if(done_2) 204 | stage = stage + 1; 205 | end 206 | 4: begin 207 | 208 | end 209 | endcase 210 | end 211 | end 212 | 213 | endmodule 214 | 215 | //genvar i; 216 | //generate 217 | // 218 | //for(i=0; i %H", x, y); 24 | end 25 | 26 | initial begin 27 | $dumpfile("sigmoid.vcd"); 28 | $dumpvars(0, test_sigmoid); 29 | 30 | rst_n = 1'b0; 31 | @(negedge clk); 32 | x = {32'hc0733333,32'h40a00000}; 33 | start = 1'b1; 34 | @(negedge clk); 35 | start = 1'b0; 36 | rst_n = 1'b1; 37 | @(posedge done); 38 | #100; 39 | 40 | rst_n = 1'b0; 41 | @(negedge clk); 42 | x = {32'h0, 32'h40a00000}; 43 | start = 1'b1; 44 | @(negedge clk); 45 | start = 1'b0; 46 | rst_n = 1'b1; 47 | @(posedge done); 48 | #100; 49 | 50 | $finish; 51 | end 52 | 53 | endmodule 54 | `endif 55 | -------------------------------------------------------------------------------- /sigmoid.v: -------------------------------------------------------------------------------- 1 | `ifndef __SIGMOID_V__ 2 | `define __SIGMOID_V__ 3 | 4 | `include "div_float.v" 5 | `include "mul_float.v" 6 | `include "add_float.v" 7 | `define GET(v,e,s) v[(e+1)*s-1:e*s] 8 | 9 | module sigmoid 10 | #(parameter S=32, parameter N=2) 11 | ( 12 | input clk, 13 | input rst_n, 14 | input start, 15 | input [S*N-1:0] x, 16 | output [S*N-1:0] y, 17 | output done 18 | ); 19 | 20 | // implements fast sigmoid, x / (1 + abs(x)) 21 | 22 | // x -> abs(x) -> 1.0 + % -> x/% -> 1 + % -> 0.5 * % 23 | reg [3:0] stage = 0; // up to 8 24 | wire [31:0] one = 32'h3f800000; 25 | wire [31:0] half = 32'h3f000000; 26 | 27 | wire [S*N-1:0] opax; // one plus abs x 28 | wire [S*N-1:0] xdo; // x div. opax 29 | wire [S*N-1:0] hpx; // half plus xdo 30 | 31 | wire [3:0] stage_done; 32 | 33 | wire add_start = (stage == 0); 34 | wire div_start = (stage == 2); 35 | wire add_start_2 = (stage == 4); 36 | wire mul_start = (stage == 6); 37 | 38 | wire add_rst_n = (stage != 0); 39 | wire div_rst_n = (stage != 2); //negedge right before stage == 2 40 | wire add_rst_n_2 = (stage != 4); 41 | wire mul_rst_n = (stage != 6); 42 | 43 | wire nan, zero, overflow, underflow, divzero; 44 | 45 | assign done = (stage == 8); 46 | 47 | always @(negedge clk) begin 48 | if(start) 49 | stage = 0; 50 | end 51 | 52 | always @(posedge clk) begin 53 | if(start) 54 | stage = 0; 55 | else begin 56 | case(stage) 57 | 0: begin 58 | if(!start) 59 | stage = stage + 1; 60 | end 61 | 1: begin 62 | if(stage_done[0]) begin 63 | stage = stage + 1; 64 | end 65 | end 66 | 2: begin 67 | stage = stage + 1; 68 | end 69 | 3: begin 70 | if(stage_done[1]) begin 71 | stage = stage+1; 72 | end 73 | end 74 | 4: begin 75 | stage = stage + 1; 76 | end 77 | 5: begin 78 | if(stage_done[2]) begin 79 | stage = stage + 1; 80 | end 81 | end 82 | 6: begin 83 | stage = stage + 1; 84 | end 85 | 7: begin 86 | if(stage_done[3]) begin 87 | stage = stage + 1; 88 | end 89 | end 90 | 8: begin 91 | 92 | end 93 | default: begin 94 | 95 | end 96 | endcase 97 | end 98 | end 99 | 100 | // TODO : change start/done signals 101 | generate 102 | genvar i; 103 | for(i=0; i