├── .gitignore
├── Makefile
├── README.md
├── add_float.t.v
├── add_float.v
├── approx_sigmoid.py
├── comp_float.t.v
├── comp_float.v
├── conversion
    ├── Makefile
    ├── f2h.cpp
    ├── gen_data.cpp
    └── h2f.cpp
├── data
    ├── b1.txt
    ├── b2.txt
    ├── w1.txt
    └── w2.txt
├── div_float.v
├── documentation.md
├── experiments
    ├── allset.v
    └── exp.v
├── filters
    ├── net.filter
    └── sigmoid.filter
├── guard.bash
├── input_conditioner.v
├── lerp.v
├── matmul.t.v
├── matmul.v
├── mul_float.t.v
├── mul_float.v
├── net.t.v
├── net.v
├── net_wrapper.t.v
├── net_wrapper.v
├── polyfit.py
├── sigmoid.t.v
├── sigmoid.v
└── x.tcl


/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # Created by https://www.gitignore.io/api/c++,vim,git,linux,python,vivado
  3 | 
  4 | ### C++ ###
  5 | # Prerequisites
  6 | *.d
  7 | 
  8 | # Compiled Object files
  9 | *.slo
 10 | *.lo
 11 | *.o
 12 | *.obj
 13 | 
 14 | # Precompiled Headers
 15 | *.gch
 16 | *.pch
 17 | 
 18 | # Compiled Dynamic libraries
 19 | *.so
 20 | *.dylib
 21 | *.dll
 22 | 
 23 | # Fortran module files
 24 | *.mod
 25 | *.smod
 26 | 
 27 | # Compiled Static libraries
 28 | *.lai
 29 | *.la
 30 | *.a
 31 | *.lib
 32 | 
 33 | # Executables
 34 | *.exe
 35 | *.out
 36 | *.app
 37 | 
 38 | 
 39 | ### Vim ###
 40 | # swap
 41 | [._]*.s[a-w][a-z]
 42 | [._]s[a-w][a-z]
 43 | # session
 44 | Session.vim
 45 | # temporary
 46 | .netrwhist
 47 | *~
 48 | # auto-generated tag files
 49 | tags
 50 | 
 51 | 
 52 | ### Git ###
 53 | *.orig
 54 | 
 55 | 
 56 | ### Linux ###
 57 | 
 58 | # temporary files which can be created if a process still has a handle open of a deleted file
 59 | .fuse_hidden*
 60 | 
 61 | # KDE directory preferences
 62 | .directory
 63 | 
 64 | # Linux trash folder which might appear on any partition or disk
 65 | .Trash-*
 66 | 
 67 | # .nfs files are created when an open file is removed but is still being accessed
 68 | .nfs*
 69 | 
 70 | 
 71 | ### Python ###
 72 | # Byte-compiled / optimized / DLL files
 73 | __pycache__/
 74 | *.py[cod]
 75 | *$py.class
 76 | 
 77 | # C extensions
 78 | 
 79 | # Distribution / packaging
 80 | .Python
 81 | env/
 82 | build/
 83 | develop-eggs/
 84 | dist/
 85 | downloads/
 86 | eggs/
 87 | .eggs/
 88 | lib/
 89 | lib64/
 90 | parts/
 91 | sdist/
 92 | var/
 93 | *.egg-info/
 94 | .installed.cfg
 95 | *.egg
 96 | 
 97 | # PyInstaller
 98 | #  Usually these files are written by a python script from a template
 99 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
100 | *.manifest
101 | *.spec
102 | 
103 | # Installer logs
104 | pip-log.txt
105 | pip-delete-this-directory.txt
106 | 
107 | # Unit test / coverage reports
108 | htmlcov/
109 | .tox/
110 | .coverage
111 | .coverage.*
112 | .cache
113 | nosetests.xml
114 | coverage.xml
115 | *,cover
116 | .hypothesis/
117 | 
118 | # Translations
119 | *.mo
120 | *.pot
121 | 
122 | # Django stuff:
123 | *.log
124 | local_settings.py
125 | 
126 | # Flask stuff:
127 | instance/
128 | .webassets-cache
129 | 
130 | # Scrapy stuff:
131 | .scrapy
132 | 
133 | # Sphinx documentation
134 | docs/_build/
135 | 
136 | # PyBuilder
137 | target/
138 | 
139 | # Jupyter Notebook
140 | .ipynb_checkpoints
141 | 
142 | # pyenv
143 | .python-version
144 | 
145 | # celery beat schedule file
146 | celerybeat-schedule
147 | 
148 | # dotenv
149 | .env
150 | 
151 | # virtualenv
152 | .venv/
153 | venv/
154 | ENV/
155 | 
156 | # Spyder project settings
157 | .spyderproject
158 | 
159 | # Rope project settings
160 | .ropeproject
161 | 
162 | 
163 | ### Vivado ###
164 | #########################################################################################################
165 | ##	This is an example .gitignore file for Vivado, please treat it as an example as 
166 | ##	it might not be complete. In addition, XAPP 1165 should be followed.
167 | #########################################################################################################
168 | #########
169 | #Exclude all
170 | #########
171 | !*/
172 | !.gitignore
173 | ###########################################################################
174 | ##	VIVADO
175 | ###########################################################################
176 | #########
177 | #Source files:
178 | #########
179 | #Do NOT ignore VHDL, Verilog, block diagrams or EDIF files.
180 | !*.vhd
181 | !*.v
182 | !*.bd
183 | !*.edif
184 | #########
185 | #IP files
186 | #########
187 | #.xci: synthesis and implemented not possible - you need to return back to the previous version to generate output products
188 | #.xci + .dcp: implementation possible but not re-synthesis
189 | #*.xci(www.spiritconsortium.org)
190 | !*.xci
191 | #*.dcp(checkpoint files)
192 | !*.dcp
193 | !*.vds
194 | !*.pb
195 | #All bd comments and layout coordinates are stored within .ui
196 | !*.ui
197 | !*.ooc
198 | #########
199 | #System Generator
200 | #########
201 | !*.mdl
202 | !*.slx
203 | !*.bxml
204 | #########
205 | #Simulation logic analyzer
206 | #########
207 | !*.wcfg
208 | !*.coe
209 | #########
210 | #MIG
211 | #########
212 | !*.prj
213 | !*.mem
214 | #########
215 | #Project files
216 | #########
217 | #XPR  +  *.XML ? XPR (Files are merged into a single XPR file for 2014.1 version)
218 | #Do NOT ignore *.xpr files
219 | !*.xpr
220 | #Include *.xml files for 2013.4 or earlier version
221 | !*.xml
222 | #########
223 | #Constraint files
224 | #########
225 | #Do NOT ignore *.xdc files
226 | !*.xdc
227 | #########
228 | #TCL - files
229 | #########
230 | !*.tcl
231 | #########
232 | #Journal - files
233 | #########
234 | !*.jou
235 | #########
236 | #Reports
237 | #########
238 | !*.rpt
239 | !*.txt
240 | !*.vdi
241 | #########
242 | #C-files
243 | #########
244 | !*.c
245 | !*.h
246 | !*.elf
247 | !*.bmm
248 | !*.xmp
249 | 
250 | *.vcd
251 | *.gtkw
252 | 
253 | data/
254 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | all : mul_float.o add_float.o comp_float.o matmul.o sigmoid.o net.o net_wrapper.o
 2 | 
 3 | .PHONY: run
 4 | 
 5 | run: all
 6 | 	@echo "===== -- mul_float -- ====="
 7 | 	./mul_float.o
 8 | 	@echo "===== -- add_float -- ====="
 9 | 	./add_float.o
10 | 	@echo "===== -- comp_float -- ====="
11 | 	./comp_float.o
12 | 	@echo "===== -- matmul -- ====="
13 | 	./matmul.o
14 | 	@echo "===== -- sigmoid -- ====="
15 | 	./sigmoid.o
16 | 	@echo "===== -- net -- ====="
17 | 	./net.o
18 | 	@echo "===== -- net_wrapper -- ====="
19 | 	./net_wrapper.o
20 | 
21 | mul_float.o : mul_float.v mul_float.t.v
22 | 	iverilog mul_float.t.v -o mul_float.o
23 | 
24 | add_float.o : add_float.v add_float.t.v
25 | 	iverilog add_float.t.v -o add_float.o
26 | 
27 | comp_float.o : comp_float.v comp_float.t.v
28 | 	iverilog comp_float.t.v -o comp_float.o
29 | 
30 | matmul.o : matmul.v matmul.t.v mul_float.v add_float.v
31 | 	iverilog matmul.t.v -o matmul.o
32 | 
33 | sigmoid.o : sigmoid.v sigmoid.t.v add_float.v mul_float.v div_float.v
34 | 	iverilog sigmoid.t.v -o sigmoid.o
35 | 
36 | net.o : net.v net.t.v sigmoid.o matmul.o
37 | 	time iverilog net.t.v -o net.o
38 | 
39 | net_wrapper.o : net_wrapper.v net_wrapper.t.v net.o
40 | 	iverilog net_wrapper.t.v -o net_wrapper.o
41 | clean:
42 | 	rm *.o
43 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CompArch Final Project
 2 | 
 3 | The goal of the final project is for you to explore a topic of interest within Computer Architecture, driven by your personal learning goals. This could build on and extend something we discussed in class, or dive into some other area of Computer Architecture (broadly defined).
 4 | 
 5 | You may work in teams of any size, as long as they are appropriately scaled for your proposed project. Groups with > 4 members will face heavy skepticism about meeting this requirement. 
 6 | 
 7 | In terms of scale, this is not a months-long capstone but rather more like an extended Lab. You will have about 2 weeks to complete it, and it will comprise 15% of your final grade. Be ambitious but realistic.
 8 | 
 9 | ## Timeline
10 | 
11 | -	Nov 17 (in class) – project ideation and team formation fair
12 | -	Nov 28 (in class) – draft project proposal due , consultations with teams
13 | -	Nov 29 – revised project proposal and work plan due
14 | -	Dec 5 – mid-point check in (self-defined in work plan, highly recommended)
15 | -	Dec 15 – final project due
16 | 
17 | ## Proposal (10%)
18 | Your project proposal should be about 1-2 pages, and must include:
19 | 
20 | -	Project title
21 | -	Team members
22 | -	Brief description of project (1-3 paragraphs)
23 | -	2-3 references you plan to use
24 | -	Minimum, planned, and stretch deliverables
25 | -	Work plan (by Tuesday)
26 | 
27 | We will discuss your proposal in class on November 28 (first class after break). These meetings will be quick and to-the-point, so you must come prepared with a printed out copy of your proposal. You should have done some background research by this point and have a good idea of your planned project trajectory.
28 | 
29 | Based on the feedback from this meeting, you will revise your proposal and submit the final version including a work plan the following day.
30 | 
31 | ## Documentation (55%)
32 | The documentation counts for 55% of your grade whether you succeed at your goal or not.  Did you shoot for the moon and land among the harsh vacuum of space?  You still learned something from the process, and as long as you document it well, you will get full credit.
33 | 
34 | Documentation should be posted in the form of a project website (PDF or MarkDown in a repo can also be acceptable depending on the project) and must answer the following questions:
35 | 
36 | ### What did you do?
37 | Your project abstract: one catchy sentence followed by a paragraph or two.  The intended audience should include people that aren't necessarily versed in Computer Architecture, but are technically competent. 
38 | ### Why did you do it?
39 | A paragraph or so about why the project you chose is worthwhile and interesting.
40 | ### How did you do it?
41 | This portion can assume an audience that has taken Computer Architecture, but don't let the story you’re telling get bogged down by buzzwords.  A sure sign of a bad engineer is ORA (over reliance on acronyms). 
42 | 
43 | ### How can someone else build on it?
44 | Include everything necessary to pick up where you left off.  This should include (as appropriate):
45 | 
46 | -	code
47 | -	schematics
48 | -	scripts and build instructions
49 | - proper attribution for resources used and anything you did not write yourself
50 | -	list of difficulties and ‘gotchas’ while doing this project
51 | - reflection on the project as a whole as well as your work plan
52 | -	possible TODOs to extend the depth of the project
53 | 
54 | This should all be posted somewhere accessible, e.g. your project webpage or repository. Please do not literally include these question prompts and then answer them (you're better than that) - instead, use them to check that you've covered all the bases as you tell the story in the way that best makes sense for your project.
55 | 
56 | ## Choosing and Achieving your Goal (30%)
57 | There is a lot of flexibility available in what your actual final project can be. As a first pass, it needs to satisfy the following criteria:
58 | 
59 | 1. Build upon what we have learned in class this semester or other "Computer Architecture" topics
60 | 1. Have well-defined criteria for when it is finished and successful
61 | 1. Be achievable within the time allotted
62 | 
63 | ## Possible broad directions:
64 | 
65 | - Extending something you started in Computer Architecture
66 | - Teaching somebody something cool about Computer Architecture
67 | - Something useful to someone that uses Computer Architecture
68 | - Something that needs the skills learned in Computer Architecture
69 | - Something that you can present at Expo that will make people want to take Computer Architecture
70 | 
71 | Append one of the following phrases to a cool project idea to make it more CompArch-y:
72 | 
73 | - ... with an FPGA
74 | - ... in assembly
75 | - ... on a GPU
76 | - ... inside a nested series of black boxes
77 | - ... hardware accelerated
78 | 
79 | As you put your project plans together, remember that a major portion of the project is communicating it to others.
80 | 
81 | ## Demo (5%)
82 | We’ll present your project work during the time blocked out for "final exam" period – December 15 from 12 – 3PM.  This is mainly an opportunity to show off and celebrate your great work (small percentage of overall grade), and the details are up to you.
83 | 
84 | The "default" option is a poster version of your project documentation (along with a running live demo if appropriate), so that folks can walk around in a studio session and see what you did. Maybe you feel that a presentation is more appropriate for your project work. Perhaps a tutorial session with everyone participating makes the most sense. It could be that only a puppet show truly captures the essence of your project. Think about final demo format as you put together your proposal, but you don't need to make a final decision just yet.
85 | 
86 | Good luck, and have fun!
87 | 
88 | 
89 | 


--------------------------------------------------------------------------------
/add_float.t.v:
--------------------------------------------------------------------------------
 1 | //`ifndef __ADD_FLOAT_T_V__
 2 | //`define __ADD_FLOAT_T_V__
 3 | //`include "add_float.v"
 4 | //
 5 | //module test_add();
 6 | //
 7 | //
 8 | //reg rst_n, clk=0, start;
 9 | //reg [31:0] a, b;
10 | //wire [31:0] o;
11 | //wire nan, overflow, underflow, zero, done;
12 | //
13 | //add_float #(.FLOAT_WIDTH(32)) add(rst_n, clk, start, 1'b0, a, b, o, nan, overflow, underflow, zero, done);
14 | //
15 | //always begin
16 | //	#10
17 | //	clk = !clk;
18 | //end
19 | //
20 | //always @(posedge done) begin
21 | //	$display("%H %H %H %d", a, b, o, $time);
22 | //end
23 | //
24 | //
25 | //initial begin
26 | //	$display("a b o");
27 | //	check(32'h40a00000, 32'h40000000);
28 | //	#500
29 | //	check(32'h400g0000, 32'h3f800000);
30 | //	#500
31 | //	$finish;
32 | //end
33 | //
34 | //task check;
35 | //	input [31:0] lhs, rhs;
36 | //	begin
37 | //		rst_n = 1'b0;
38 | //		@(negedge clk);
39 | //		a = lhs; 
40 | //		b = rhs;
41 | //		start = 1;
42 | //		@(negedge clk);
43 | //		start = 0;
44 | //		rst_n = 1'b1;
45 | //		@(posedge done);
46 | //	end
47 | //endtask
48 | //
49 | //endmodule
50 | //`endif
51 | //
52 | `ifndef __ADD_FLOAT_T_V__
53 | `define __ADD_FLOAT_T_V__
54 | `include "add_float.v"
55 | 
56 | module test_add();
57 | 
58 | reg rst_n, clk=0, start;
59 | reg [31:0] a, b;
60 | wire [31:0] o;
61 | wire nan, overflow, underflow, zero, done;
62 | 
63 | add_float #(.FLOAT_WIDTH(32)) add(rst_n, clk, start, 1'b0, a, b, o, nan, overflow, underflow, zero, done);
64 | 
65 | always begin
66 | 	#10
67 | 	clk = !clk;
68 | end
69 | 
70 | always @(posedge done) begin
71 | 	$display("%H %H %H", a, b, o);
72 | end
73 | 
74 | initial begin
75 | 	$display("a b o");
76 | 
77 | 	rst_n = 1'b0;
78 | 	@(negedge clk);
79 | 	a = 32'h3f800000;
80 | 	b = 32'h3f800000;
81 | 	start = 1;
82 | 	@(negedge clk);
83 | 	start = 0;
84 | 	rst_n = 1'b1;
85 | 	#500;
86 | 	$finish;
87 | end
88 | 
89 | endmodule
90 | `endif
91 | 


--------------------------------------------------------------------------------
/add_float.v:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yycho0108/CompArchNeuralNet/d4799406d41134e9f1a94142fe454d4ffa5c4a5d/add_float.v


--------------------------------------------------------------------------------
/approx_sigmoid.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import numpy as np
 4 | import sys
 5 | from matplotlib import pyplot as plt
 6 | 
 7 | def sigmoid(x):
 8 |     return 1.0 /(1.0 + np.exp(-x))
 9 | def fast_sigmoid(x):
10 |     return 0.5*(1 +  x / (1.0 + np.abs(x)))
11 | 
12 | # x -> abs(x) -> 1.0 + % -> x/% -> 1 + % - 0.5 * %
13 | def main():
14 |     if sys.argv[1].lower() == 'plot':
15 |         xs = np.linspace(-10,10,200)
16 |         ys = sigmoid(xs)
17 |         p = np.polyfit(xs, ys, 5)
18 |         f = np.poly1d(p)
19 | 
20 |         plt.plot(xs,ys)
21 |         plt.plot(xs,fast_sigmoid(xs))
22 |         plt.plot(xs,f(xs))
23 | 
24 | 
25 |         plt.title('Approximated Sigmoid')
26 |         plt.legend(['sigmoid', 'fast sigmoid', 'polyfit'], loc='lower right')
27 | 
28 |         plt.show()
29 |     else:
30 |         x = float(sys.argv[1])
31 |         print 'sigmoid :', sigmoid(x)
32 |         print 'approx :', fast_sigmoid(x)
33 | 
34 | if __name__ == "__main__":
35 |     main()
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/comp_float.t.v:
--------------------------------------------------------------------------------
 1 | `ifndef __COMP_FLOAT_T_V__
 2 | `define __COMP_FLOAT_T_V__
 3 | `include "comp_float.v"
 4 | 
 5 | module test_comp_float();
 6 | 
 7 | wire [2:0] flag;
 8 | reg [31:0] a;
 9 | reg [31:0] b;
10 | 
11 | comp_float cmp(flag, a,b);
12 | 
13 | initial begin
14 | 	a = 32'h3f800000;
15 | 	b = 32'h40000000;
16 | 	#500;
17 | 	$display("%H %H | %b", a, b, flag);
18 | 
19 | 	a = 32'h40000000;
20 | 	b = 32'h3f800000;
21 | 	#500;
22 | 	$display("%H %H | %b", a, b, flag);
23 | 
24 | 	a = 32'h40000000;
25 | 	b = 32'h40000000;
26 | 	#500;
27 | 	$display("%H %H | %b", a, b, flag);
28 | end
29 | 
30 | endmodule
31 | `endif
32 | 


--------------------------------------------------------------------------------
/comp_float.v:
--------------------------------------------------------------------------------
 1 | `ifndef __COMP_FLOAT_V__
 2 | `define __COMP_FLOAT_V__
 3 | module comp_float(
 4 | 	output [2:0] flag, 
 5 | 	input [31:0] a,
 6 | 	input [31:0] b
 7 | );
 8 | 
 9 | wire s1, s2;
10 | wire [7:0] e1, e2;
11 | wire [22:0] m1, m2;
12 | 
13 | assign {s1,e1,m1} = a;
14 | assign {s2,e2,m2} = b;
15 | 
16 | wire [1:0] sign, exp, mag;                                   
17 | 
18 | assign sign= {s1,s2};
19 | 
20 | assign exp= (e1 > e2) ? 2'b10:
21 |             (e2 > e1) ? 2'b01: 2'b00;
22 | 
23 | assign mag= (exp == 2'b00) ? ((m1 > m2) ? 2'b10:
24 |                                 (m2 > m1) ? 2'b01: 2'b00): 2'b11;
25 |                                     
26 | assign flag= (sign == 2'b00) ? ((exp == 2'b10) ? 3'b100:
27 |                                 (exp == 2'b01) ? 3'b001:
28 |        				(mag == 2'b10) ? 3'b100:
29 | 				(mag == 2'b01) ? 3'b001:3'b010):
30 |              (sign == 2'b11) ? ((exp == 2'b10) ? 3'b001:
31 |                                 (exp == 2'b01) ? 3'b100: 
32 |                                 (mag == 2'b10) ? 3'b001:
33 |                                 (mag == 2'b01) ? 3'b100:3'b010):
34 |              (sign == 2'b10) ? 3'b100 : 3'b001;                    
35 | endmodule
36 | `endif
37 | 


--------------------------------------------------------------------------------
/conversion/Makefile:
--------------------------------------------------------------------------------
 1 | all: h2f.o f2h.o gen_data.o
 2 | 
 3 | h2f.o: h2f.cpp
 4 | 	g++ h2f.cpp -o h2f.o
 5 | 
 6 | f2h.o: f2h.cpp
 7 | 	g++ f2h.cpp -o f2h.o
 8 | 
 9 | gen_data.o : gen_data.cpp
10 | 	g++ -std=c++11 gen_data.cpp -L/usr/lib -larmadillo -o gen_data.o
11 | 


--------------------------------------------------------------------------------
/conversion/f2h.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdlib>
 2 | #include <cstdio>
 3 | #include <iostream>
 4 | 
 5 | void usage(){
 6 | 	fprintf(stderr, "USAGE : f2h <a> <b>\n");
 7 | }
 8 | 
 9 | int main(int argc, char* argv[]){
10 | 	if(argc != 3){
11 | 		usage();
12 | 		return -1;
13 | 	}
14 | 
15 | 	float a = std::atof(argv[1]);
16 | 	float b = std::atof(argv[2]);
17 | 
18 | 	float c = a*b;
19 | 
20 | 	printf("a : %x\n",*(unsigned int*)(&a)); 
21 | 	printf("b : %x\n",*(unsigned int*)(&b)); 
22 | 	printf("a*b : %x",*(unsigned int*)(&c)); 
23 | }
24 | 


--------------------------------------------------------------------------------
/conversion/gen_data.cpp:
--------------------------------------------------------------------------------
 1 | #include <random>
 2 | #include <string>
 3 | #include <sstream>
 4 | #include <iostream>
 5 | 
 6 | #define ARMA_DONT_USE_CXX11
 7 | #include <armadillo>
 8 | 
 9 | std::string f2h(const float& f){
10 | 	char str[9] = {};
11 | 	snprintf(str, 9, "%x", *(unsigned int*)(&f));
12 | 	return str;
13 | }
14 | 
15 | float h2f(const std::string& h){
16 | 	unsigned int x;
17 | 	std::stringstream ss;
18 | 	ss << std::hex << h;
19 | 	ss >> x;
20 | 	return reinterpret_cast<float&>(x);
21 | }
22 | void print_hex(char name, const arma::mat& m){
23 | 	std::cout << name << std::endl;
24 | 	for(unsigned int i=0; i<m.n_rows; ++i){
25 | 		for(unsigned int j=0; j<m.n_cols; ++j){
26 | 			std::cout << f2h(m(i,j)) << ' ';
27 | 			// verification std::cout << '[' << h2f(f2h(m(i,j))) << ']';
28 | 		}
29 | 		std::cout << std::endl;
30 | 	}
31 | }
32 | 
33 | void print_verilog_hex(char name, const arma::mat& m){
34 | 	std::cout << name << " = {";
35 | 	for(auto& e : m){
36 | 		std::cout << "32'h" << f2h(e) << ", ";
37 | 	}
38 | 	std::cout << "};" << std::endl;
39 | }
40 | int main(){
41 | 	int h = 4;
42 | 	int w = 6;
43 | 	int c = 3;
44 | 
45 | 	//std::cout << f2h(2.0) << std::endl;
46 | 	//std::cout << h2f(f2h(2.0)) << std::endl;
47 | 
48 | 	arma::mat a = arma::randn<arma::mat>(h,c);
49 | 	arma::mat b = arma::randn<arma::mat>(c,w);
50 | 	arma::mat o = a*b;
51 | 
52 | 	std::cout << a << std::endl;
53 | 	std::cout << b << std::endl;
54 | 	std::cout << o << std::endl;
55 | 
56 | 
57 | 	print_verilog_hex('a', a);
58 | 	print_verilog_hex('b', b);
59 | 	print_hex('a', a);
60 | 	print_hex('b', b);
61 | 	print_hex('o', a*b);
62 | }
63 | 


--------------------------------------------------------------------------------
/conversion/h2f.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdlib>
 2 | #include <cstdio>
 3 | #include <iostream>
 4 | #include <sstream>
 5 | 
 6 | void usage(){
 7 | 	fprintf(stderr, "USAGE : h2f <hex>\n");
 8 | }
 9 | 
10 | int main(int argc, char* argv[]){
11 | 	if(argc != 2){
12 | 		usage();
13 | 		return -1;
14 | 	}
15 | 
16 | 	unsigned int x;
17 | 	std::stringstream ss;
18 | 	ss << std::hex << argv[1];
19 | 	ss >> x;
20 | 	float f = reinterpret_cast<float&>(x);
21 | 	printf("%f\n", f);
22 | }
23 | 


--------------------------------------------------------------------------------
/data/b1.txt:
--------------------------------------------------------------------------------
1 | c0484701
2 | c005bde9
3 | 3f3bd890
4 | bf8ad9d4
5 | 
6 | 


--------------------------------------------------------------------------------
/data/b2.txt:
--------------------------------------------------------------------------------
1 | c0643a93
2 | 
3 | 


--------------------------------------------------------------------------------
/data/w1.txt:
--------------------------------------------------------------------------------
 1 | 40b73992
 2 | c0d0bf63
 3 | c0a61919
 4 | 40773ea3
 5 | c01ff261
 6 | c0453fb9
 7 | 3e93541e
 8 | be53b6ca
 9 | 
10 | 


--------------------------------------------------------------------------------
/data/w2.txt:
--------------------------------------------------------------------------------
1 | 4124d498
2 | 410ba3e7
3 | c092b275
4 | bf84c951
5 | 
6 | 


--------------------------------------------------------------------------------
/div_float.v:
--------------------------------------------------------------------------------
  1 | `ifndef __DIV_FLOAT_V__
  2 | `define __DIV_FLOAT_V__
  3 | module div_float
  4 |   #(parameter
  5 |    FLOAT_WIDTH = 64
  6 |   )
  7 |   (
  8 |   input wire rst_n, clk, start,
  9 |   input wire [FLOAT_WIDTH - 1: 0] op1, op2,
 10 |   output reg [FLOAT_WIDTH - 1: 0] out_reg,
 11 |   output reg divizion_by_zero_reg,
 12 |   output reg nan_reg,
 13 |   output reg overflow_reg,
 14 |   output reg underflow_reg,
 15 |   output reg zero_reg,
 16 |   output reg done_reg
 17 |   );
 18 |   localparam EXP_WIDTH = (FLOAT_WIDTH == 64) ? 11: 8; 
 19 |   localparam FRACTION_WIDTH = (FLOAT_WIDTH == 64) ? 52: 23;
 20 |   localparam FULL_FRACTION_WIDTH = 2 * FRACTION_WIDTH + 1;
 21 |   localparam SIGN_BIT =  FLOAT_WIDTH - 1;
 22 |   localparam EXP_MSB = SIGN_BIT - 1;
 23 |   localparam EXP_LSB = EXP_MSB - EXP_WIDTH + 1;
 24 |   localparam EXP_SHIFT = (2 ** (EXP_WIDTH - 1)) - 1;
 25 |   localparam EXP_MAX = (2 ** (EXP_WIDTH)) - 1;
 26 |   localparam FRACTION_MSB = EXP_LSB - 1;
 27 |   localparam DIV_COUNTER_WIDTH = (FLOAT_WIDTH == 64) ? 7: 6;
 28 |   localparam NAN_VALUE = (FLOAT_WIDTH == 64) ? 64'hFFF8_0000_0000_0000: 32'hFFC0_0000;
 29 |   localparam INF_VALUE = (FLOAT_WIDTH == 64) ? 64'h7FF0_0000_0000_0000: 32'h7F80_0000;
 30 | 
 31 |   wire [FRACTION_MSB: 0] frac1 = op1[FRACTION_MSB: 0],
 32 |    frac2 = op2[FRACTION_MSB: 0];
 33 |   
 34 |   reg [FRACTION_WIDTH: 0] result_frac_reg;
 35 | 
 36 |   reg [FULL_FRACTION_WIDTH - 1: 0] op1frac_stage_reg, op2frac_stage_reg;
 37 |   wire op2_aligned = frac1 < frac2;
 38 |   // exponent calculation
 39 |   wire [EXP_WIDTH - 1: 0] exp1 = op1[EXP_MSB: EXP_LSB],
 40 |     exp2 = op2[EXP_MSB: EXP_LSB];
 41 | 
 42 |   wire [EXP_WIDTH: 0] result_exp_before_correction, temp_result;
 43 | 
 44 |   reg [EXP_WIDTH: 0] result_exp_reg;
 45 | 
 46 |   wire zero1 = exp1 == 0,
 47 |   exp1_max = exp1 == EXP_MAX,
 48 |   frac1_zero = frac1 == 0,
 49 |   inf1 = exp1_max & frac1_zero,
 50 |   nan1 = exp1_max & !frac1_zero,
 51 |   zero2 = exp2 == 0,
 52 |   exp2_max = exp2 == EXP_MAX,
 53 |   frac2_zero = frac2 == 0,
 54 |   inf2 = exp2_max & frac2_zero,
 55 |   nan2 = exp2_max & !frac2_zero,
 56 |   inf_out = zero2 & (!(nan1 | zero1)),
 57 |   zero_out = zero1 & (!(nan2 | zero1)),
 58 |   underflow_before_correction = (temp_result < exp2),
 59 |   zero_before_correction = result_exp_before_correction == 0;
 60 |   
 61 |   assign 
 62 |     temp_result = EXP_SHIFT + exp1,
 63 |     result_exp_before_correction = temp_result - exp2;
 64 | 
 65 |   always@*
 66 |   begin
 67 |     if(underflow_reg) begin
 68 |       result_exp_reg = 0;
 69 |     end
 70 |     else if (overflow_reg) begin
 71 |       result_exp_reg = EXP_MAX;
 72 |     end
 73 |     else 
 74 |     begin
 75 |       result_exp_reg = result_exp_before_correction - op2_aligned;
 76 |     end
 77 |   end
 78 |   //--------------------
 79 |   // fractional calculation
 80 |   reg [DIV_COUNTER_WIDTH   - 1: 0] div_counter_reg;
 81 |   wire sign1 = op1[SIGN_BIT],
 82 |   sign2 = op2[SIGN_BIT],
 83 |   result_sign = sign1 ^ sign2;
 84 |   
 85 |   always@*
 86 |   begin
 87 |     overflow_reg = result_exp_reg[EXP_WIDTH];
 88 |     underflow_reg = underflow_before_correction || (zero_before_correction && op2_aligned); 
 89 |     nan_reg = nan1 | nan2 | (inf1 & inf2) | (zero1 & zero2);
 90 |   end
 91 |   
 92 |   always@(negedge rst_n, posedge clk)
 93 |   begin
 94 |     if(!rst_n)
 95 |     begin
 96 |       op1frac_stage_reg <= 0;
 97 |       op2frac_stage_reg <= 0;
 98 |       div_counter_reg <= 0;
 99 |       result_frac_reg <= 0;
100 |       done_reg <= 0;
101 |     end
102 |     else
103 |     begin
104 |       if(start)
105 |       begin
106 |         op1frac_stage_reg <= 0;
107 |         op2frac_stage_reg <= 0;
108 |         div_counter_reg <= 0;
109 |         result_frac_reg <= 0;
110 |         done_reg <= 0;
111 |       end
112 |       else 
113 |       begin
114 |         if(div_counter_reg == 0)
115 |         begin
116 |           op1frac_stage_reg <= {1'b1, frac1, {FRACTION_WIDTH {1'b0}}};
117 |           op2frac_stage_reg <= (op2_aligned)? {2'b01, frac2[FRACTION_MSB: 0], {FRACTION_WIDTH - 1 {1'b0}}}: {1'b1, frac2, {FRACTION_WIDTH {1'b0}}};
118 |         end
119 |         else
120 |         begin
121 | 		  if(div_counter_reg < FRACTION_WIDTH + 3)
122 |           begin
123 |             if(op1frac_stage_reg >= op2frac_stage_reg && op1frac_stage_reg != 0)
124 |             begin
125 |               op1frac_stage_reg <= op1frac_stage_reg - op2frac_stage_reg;
126 |               result_frac_reg <= {result_frac_reg[FRACTION_WIDTH - 1: 0], 1'b1};
127 |             end
128 |             else
129 |             begin
130 |               result_frac_reg <= {result_frac_reg[FRACTION_WIDTH - 1: 0], 1'b0};
131 |             end
132 |             op2frac_stage_reg <= op2frac_stage_reg >> 1;
133 |           end  
134 |           else
135 |           begin
136 |             if(nan_reg)
137 |             begin
138 |               out_reg <= NAN_VALUE;
139 |             end
140 |             else if(inf_out)
141 |               begin
142 |                 out_reg <= INF_VALUE | (result_sign << SIGN_BIT);            
143 |               end
144 |               else if(zero_out)
145 |                 begin
146 |                   out_reg <= 0;
147 |                 end
148 |                 else
149 |                 begin
150 | 				  out_reg <= {result_sign, result_exp_reg[EXP_WIDTH - 1: 0], result_frac_reg[FRACTION_WIDTH: 1]};
151 |                 end
152 |             div_counter_reg <= 0;
153 |             done_reg <= 1;
154 |           end
155 |         end  
156 |         div_counter_reg <= div_counter_reg + 1'b1;  
157 |       end
158 |     end
159 |   end
160 | endmodule
161 | `endif
162 | 


--------------------------------------------------------------------------------
/documentation.md:
--------------------------------------------------------------------------------
 1 | ## Documentation
 2 | 
 3 | ## Links
 4 | 
 5 | [Proposal](https://docs.google.com/document/d/1ULqsxGpxzVEamMZO5F3Pto4A9VCm7r37K5DoMaEWpnU/edit?usp=sharing)
 6 | 
 7 | [Demo Video](http://www.youtube.com)
 8 | 
 9 | ### What did I do?
10 | 
11 | I implemented a generic feedforward nerual network in verilog, with approximated sigmoid activation function. Based on precomputed weights from a backpropagation network in Armadillo, I was able to load the parameters onto the network and verify the results for an XOR trained for XOR classification, the most basic application of the neural network demonstrating its functions.
12 | 
13 | ### Why did I do it?
14 | 
15 | The operations that govern a neural network are, by nature, heavily parallel, whereas a CPU is mostly sequential. Modern machine-learning architectures take advantage of the massively parallel processing power of the GPU to accelerate the training and computing process, but [recent benchmarks](https://liu.diva-portal.org/smash/get/diva2:930724/FULLTEXT01.pdf) show that as more parallelism is required, FPGAs can outperform GPUs; indeed, unlike GPUs that require external computing interfaces, FPGAs can be a efficient and independent alternative to the task of training the neural network and computing predictions.
16 | 
17 | ### How did I do this?
18 | 
19 | Implementing an FPU library *and* building a neural network would have been beyond the scope of this project, so I used the FPU library from [here](https://github.com/arktur04/FPU). In retrospect, this particular choice of the FPU library was unfortunate since it wasn't very synthesis-friendly and caused conflicts when implementing on the FPGA.
20 | 
21 | ### Challenges
22 | 
23 | Floating Point Library
24 | Stage-Based Computation
25 | Matrix Multiplication and Indexing
26 | Compiling Time
27 | Implementing on the FPGA - ambiguous clocks, non-synthesizable FPU
28 | 
29 | ### Building Upon
30 | 


--------------------------------------------------------------------------------
/experiments/allset.v:
--------------------------------------------------------------------------------
 1 | module allset();
 2 | 
 3 | reg [7:0] b;
 4 | 
 5 | initial begin
 6 | 	b = 8'b00000000;
 7 | 	$display("%b %b", b, &b);
 8 | 	#50
 9 | 	b = 8'b11110000;
10 | 	$display("%b %b", b, &b);
11 | 	#50
12 | 	b = 8'b11111111;
13 | 	$display("%b %b", b, &b);
14 | 	$finish;
15 | end
16 | 
17 | endmodule
18 | 


--------------------------------------------------------------------------------
/experiments/exp.v:
--------------------------------------------------------------------------------
  1 | module addition (s3, e3, m3, s1, s2, e1, e2, m1, m2);
  2 |       
  3 | output s3;
  4 | output [7:0] e3;
  5 | output [22:0] m3;
  6 | input s1, s2;
  7 | input [7:0] e1, e2;
  8 | input [22:0] m1, m2;   
  9 | 
 10 | wire  s3;   
 11 | wire [7:0] e3;   
 12 | wire [22:0] m3; 
 13 | wire diff;
 14 | wire [3:0] count;
 15 | wire [7:0] mbuff6,counter;
 16 | wire [24:0] mbuff1, mbuff2, mbuff3,mbuff4,mbuff5;
 17 | 
 18 | 
 19 | assign diff = (e1 > e2) ? 1'b1 : 
 20 | 		(e2 > e1) ?  1'b0 :
 21 | 		(m1 > m2) ? 1'b1 : 1'b0;  
 22 | 
 23 | assign s3 = (s1==s2) ? s1:
 24 |             (diff==1'b1) ? s1: s2;
 25 |   
 26 |        
 27 | assign counter = (diff == 0) ? e2 - e1 :  e1 - e2;
 28 | assign mbuff1 =  (diff == 0) ? {2'b01,m1} : {2'b01,m2};
 29 | assign mbuff3 =  (diff == 0) ? {2'b01,m2} : {2'b01,m1};
 30 | assign mbuff6 =  (diff == 0) ? (e2 - 8'b01111111) : (e1 - 8'b01111111); 
 31 | 
 32 | assign   mbuff2 = mbuff1>>counter;
 33 | 
 34 |  
 35 | assign    mbuff4 = (s1==s2) ?  (mbuff2 + mbuff3) : (mbuff3 - mbuff2);
 36 | 
 37 |            
 38 | assign mbuff5= (mbuff4[24]==1) ? (mbuff4 << 1'b1):
 39 |                (mbuff4[23]==1) ? (mbuff4 << 2'b10):
 40 |                (mbuff4[22]==1) ? (mbuff4 << 2'b11):
 41 |                (mbuff4[21]==1) ? (mbuff4 << 3'b100):
 42 | 	       (mbuff4[20]==1) ? (mbuff4 << 3'b101):	
 43 |  	       (mbuff4[19]==1) ? (mbuff4 << 3'b110):
 44 |    	       (mbuff4[18]==1) ? (mbuff4 << 3'b111):
 45 | 	       (mbuff4[17]==1) ? (mbuff4 << 4'b1000):
 46 | 	       (mbuff4[16]==1) ? (mbuff4 << 4'b1001):
 47 | 	       (mbuff4[15]==1) ? (mbuff4 << 4'b1010):
 48 |  	       (mbuff4[14]==1) ? (mbuff4 << 4'b1011):
 49 |                (mbuff4[13]==1) ? (mbuff4 << 4'b1100):
 50 | 	       (mbuff4[12]==1) ? (mbuff4 << 4'b1101):
 51 |  	       (mbuff4[11]==1) ? (mbuff4 << 4'b1110):
 52 |   	       (mbuff4[10]==1) ? (mbuff4 << 4'b1111):
 53 | 	       (mbuff4[9]==1) ? (mbuff4 << 5'b10000):
 54 |                (mbuff4[8]==1) ? (mbuff4 << 5'b10001):
 55 | 	       (mbuff4[7]==1) ? (mbuff4 << 5'b10010):
 56 |  	       (mbuff4[6]==1) ? (mbuff4 << 5'b10011):
 57 |  	       (mbuff4[5]==1) ? (mbuff4 << 5'b10100):
 58 |                (mbuff4[4]==1) ? (mbuff4 << 5'b10101):
 59 | 	       (mbuff4[3]==1) ? (mbuff4 << 5'b10110):
 60 |                (mbuff4[2]==1) ? (mbuff4 << 5'b10111):
 61 |                (mbuff4[1]==1) ? (mbuff4 << 5'b11000):
 62 |                (mbuff4[0]==1) ? (mbuff4 << 1): 25'b0000000000000000000000000;
 63 | 
 64 | 
 65 | 
 66 | assign e3= (mbuff4[24]==1) ? (mbuff6 + 8'b10000000):
 67 |            (mbuff4[23]==1) ? (mbuff6 + 8'b01111111):
 68 |            (mbuff4[22]==1) ? (mbuff6 + 8'b01111111 - 1'b1):
 69 |            (mbuff4[21]==1) ? (mbuff6 + 8'b01111111 - 2'b10):
 70 |            (mbuff4[20]==1) ? (mbuff6 + 8'b01111111 - 2'b11):
 71 |            (mbuff4[19]==1) ? (mbuff6 + 8'b01111111 - 3'b100):
 72 |            (mbuff4[18]==1) ? (mbuff6 + 8'b01111111 - 3'b101):
 73 |            (mbuff4[17]==1) ? (mbuff6 + 8'b01111111 - 3'b110):
 74 |            (mbuff4[16]==1) ? (mbuff6 + 8'b01111111 - 3'b111):
 75 |            (mbuff4[15]==1) ? (mbuff6 + 8'b01111111 - 4'b1000):
 76 |            (mbuff4[14]==1) ? (mbuff6 + 8'b01111111 - 4'b1001):
 77 |            (mbuff4[13]==1) ? (mbuff6 + 8'b01111111 - 4'b1010):      
 78 |            (mbuff4[12]==1) ? (mbuff6 + 8'b01111111 - 4'b1011):
 79 |            (mbuff4[11]==1) ? (mbuff6 + 8'b01111111 - 4'b1100):
 80 |            (mbuff4[10]==1) ? (mbuff6 + 8'b01111111 - 4'b1101):
 81 |            (mbuff4[9]==1) ? (mbuff6 + 8'b01111111 - 4'b1110):
 82 |            (mbuff4[8]==1) ? (mbuff6 + 8'b01111111 - 4'b1111):
 83 |            (mbuff4[7]==1) ? (mbuff6 + 8'b01111111 - 5'b10000):
 84 |            (mbuff4[6]==1) ? (mbuff6 + 8'b01111111 - 5'b10001):
 85 |            (mbuff4[5]==1) ? (mbuff6 + 8'b01111111 - 5'b10010):
 86 |            (mbuff4[4]==1) ? (mbuff6 + 8'b01111111 - 5'b10011):
 87 |            (mbuff4[3]==1) ? (mbuff6 + 8'b01111111 - 5'b10100): 
 88 |            (mbuff4[2]==1) ? (mbuff6 + 8'b01111111 - 5'b10101):
 89 |            (mbuff4[1]==1) ? (mbuff6 + 8'b01111111 - 5'b10110):
 90 |            (mbuff4[0]==1) ? (mbuff6 + 8'b01111111 - 5'b10111):8'b00000000;
 91 | 
 92 | assign m3= mbuff5[24:2];
 93 | 
 94 | endmodule
 95 |   
 96 | 
 97 | module multiplication (s3, e3, m3, s1, s2, e1, e2, m1, m2);
 98 | 
 99 | output s3;
100 | output [7:0] e3;
101 | output [22:0] m3;
102 | input s1, s2; 
103 | input [7:0] e1, e2; 
104 | input [22:0] m1, m2;
105 | 
106 | wire [7:0] mbuff2, mbuff3,mbuff4, count;
107 | wire [23:0] imply1, imply2;
108 | wire [47:0] mbuff1, mbuff5; 
109 | 
110 | assign mbuff3= e1-8'b01111111;
111 | assign mbuff4= e2-8'b01111111;
112 | 
113 | assign mbuff2= mbuff3 + mbuff4;
114 | 
115 | assign imply1= {1'b1,m1};
116 | assign imply2= {1'b1,m2};
117 | 
118 | assign mbuff1= imply1 * imply2;       
119 | 
120 | assign count= ( mbuff1[47] == 1) ?  8'b00000001 : 8'b00000010;
121 | 
122 | assign mbuff5= mbuff1 << count;         
123 | 
124 | assign e3 = (e1==8'b00000000) ? 8'b00000000:
125 |             (e2==8'b00000000) ? 8'b00000000:mbuff2 - count +8'b00000010+8'b01111111;
126 | 
127 | assign m3 = (e1==8'b00000000) ? 23'b0000000000000000000000: 
128 |             (e2==8'b00000000) ? 23'b0000000000000000000000:mbuff5 [47:25];
129 | 
130 | assign s3 = (e1==8'b00000000) ? 1'b0:
131 |             (e1==8'b00000000) ? 1'b0: s1 ^ s2;                   
132 | 
133 | endmodule
134 | 
135 | 
136 | module compare (flag, s1, s2, e1, e2, m1, m2);
137 | 
138 | output [2:0] flag;
139 | input s1, s2;
140 | input [7:0] e1, e2;
141 | input [22:0] m1, m2;
142 | 
143 | wire [1:0] sign, exp, mag;                                   
144 | 
145 | assign sign= {s1,s2};
146 | 
147 | assign exp= (e1 > e2) ? 2'b10:
148 |             (e2 > e1) ? 2'b01: 2'b00;
149 | 
150 | assign mag= (exp == 2'b00) ? ((m1 > m2) ? 2'b10:
151 |                                 (m2 > m1) ? 2'b01: 2'b00): 2'b11;
152 |                                     
153 | 
154 | 
155 | 
156 | assign flag= (sign == 2'b00) ? ((exp == 2'b10) ? 3'b100:
157 |                                 (exp == 2'b01) ? 3'b001:
158 |        				(mag == 2'b10) ? 3'b100:
159 | 				(mag == 2'b01) ? 3'b001:3'b010):
160 |              (sign == 2'b11) ? ((exp == 2'b10) ? 3'b001:
161 |                                 (exp == 2'b01) ? 3'b100: 
162 |                                 (mag == 2'b10) ? 3'b001:
163 |                                 (mag == 2'b01) ? 3'b100:3'b010):
164 |              (sign == 2'b10) ? 3'b100 : 3'b001;                    
165 | 
166 | 
167 | endmodule
168 | 
169 | module modulo32 (s2, e2, m2, s1, e1, m1);
170 | 
171 | output s2;
172 | output [7:0] e2;
173 | output [22:0] m2;
174 | input s1;
175 | input [7:0] e1;
176 | input [22:0] m1;
177 | 
178 | wire [7:0] mbuff3, count;
179 | wire [23:0] imp1,imp2 ,imp3 ,imp4 ,imp5 ,imp6 ,n1 ,n2;                  
180 | 
181 | assign s2=s1;
182 | assign mbuff3= e1-8'b01111111;
183 | assign imp1= {1'b1,m1};
184 | 
185 | assign count= (mbuff3 > 8'b00010111) ? (mbuff3 - 8'b00010111):
186 |                                        (8'b00010111 - mbuff3);
187 | 
188 | assign imp2= (mbuff3 > 8'b00010111) ? (imp1 << count):(imp1 >> count);
189 | 
190 | assign n1= (mbuff3[7] == 1'b1) ? 24'b000000000000000000000000 : 
191 |                                 (24'b000000000000000000011111 & imp2);
192 | 
193 | assign e2= (n1[4] == 1) ? 8'b10000011:
194 |            (n1[3] == 1) ? 8'b10000010: 
195 |            (n1[2] == 1) ? 8'b10000001:
196 |            (n1[1] == 1) ? 8'b10000000:
197 |            (n1[0] == 1) ? 8'b01111111: 8'b00000000; 
198 | 
199 | assign n2= (n1[4] == 1) ? n1 << 8'b00010011:
200 |            (n1[3] == 1) ? n1 << 8'b00010100:
201 |            (n1[2] == 1) ? n1 << 8'b00010101:
202 |            (n1[1] == 1) ? n1 << 8'b00010110:
203 |            (n1[0] == 1) ? n1 << 8'b00010111: 24'b000000000000000000000000;
204 | 
205 | 
206 | assign m2= n2[22:0];
207 | 
208 | endmodule
209 | 
210 | 
211 | module round (s2, e2, m2, s1, e1, m1);
212 | 
213 | output s2;
214 | output [7:0] e2;
215 | output [22:0] m2;
216 | input s1;
217 | input [7:0] e1;
218 | input [22:0] m1;
219 | 
220 | wire mbuff1,mbuff2;
221 | wire [7:0] mbuff3, count, X, w;
222 | wire [23:0] imp1,imp2,imp3,imp4,imp5,imp6,imp7,a;
223 |                               
224 | 
225 | assign mbuff3= (e1 - 8'b01111111);
226 | 
227 | assign imp1= {1'b1,m1};
228 | 
229 | assign count = (8'b00010111 - mbuff3);
230 | 
231 | assign imp2 = imp1 >> (count-8'b00000001);
232 | 
233 | assign imp3= imp2 & 24'b00000000000000000000001;
234 | 
235 | assign imp4= imp2 >> 1;
236 | 
237 | assign imp5= imp4 + imp3;
238 | 
239 | assign   X = mbuff3+ 8'b00000001;
240 | 
241 | assign  w= ( (mbuff3< 8'b00010111) & ( imp5[X]== 1'b1) ) ?  (mbuff3 +8'b00000001 + 8'b01111111 ) :
242 |            ( (mbuff3< 8'b00010111) & ( imp5[X]== 1'b0) ) ?  (mbuff3 +8'b01111111) : e1;
243 | 
244 | assign imp6 = ( (mbuff3< 8'b00010111) & ( imp5[X]== 1'b1) ) ?
245 |                 (imp5 << count- 8'b00000001):
246 |              ( (mbuff3< 8'b00010111) & ( imp5[X]== 1'b0) ) ? ( imp5<<count): imp5;
247 | 
248 | 
249 | assign a = ( (mbuff3< 8'b00010111) & ( imp5[X]== 1'b0) ) ?  imp6[22:0]:
250 |             ( (mbuff3< 8'b00010111) & ( imp5[X]==1'b0) ) ? imp6[22:0]:m1;
251 | 
252 | 
253 | assign s2= (e1 < 8'b01111110) ? 1'b0: s1;
254 | 
255 | assign e2= (e1 < 8'b01111110) ? 8'b00000000: w;
256 | 
257 | assign m2= (e1 < 8'b01111110) ? 23'b00000000000000000000000: a;    
258 | 
259 | 
260 |  
261 | endmodule
262 | 
263 | 
264 | 
265 | module powertwo (s3, e3, m3, s1, e1, m1);
266 | 
267 | output s3;
268 | output [7:0] e3;
269 | output [22:0] m3;
270 | input s1;
271 | input [7:0] e1;
272 | input [22:0] m1;
273 | 
274 | wire [7:0] mbuff, exp;
275 | wire [23:0] mag;
276 |                                  
277 | assign exp = ( e1 - 8'b01111111 );
278 | assign mag = {8'b00000001 , m1};
279 | 
280 | assign mbuff =(s1==0)? ((exp == 8'b00000111) ? mag[23:16]:
281 |                         (exp == 8'b00000110) ? mag[24:17]:
282 |                         (exp == 8'b00000101) ? mag[25:18]:
283 |                         (exp == 8'b00000100) ? mag[26:19]:
284 |                         (exp == 8'b00000011) ? mag[27:20]:
285 |                         (exp == 8'b00000010) ? mag[28:21]: 
286 |                         (exp == 8'b00000001) ? mag[29:22]: mag[30:23]):
287 |                         ((exp == 8'b00000111) ? mag[23:16]:
288 |                         (exp == 8'b00000110) ? (8'b00000000 - mag[24:17]):
289 |                         (exp == 8'b00000101) ? (8'b00000000 - mag[25:18]):
290 |                         (exp == 8'b00000100) ? (8'b00000000 - mag[26:19]):
291 |                         (exp == 8'b00000011) ? (8'b00000000 - mag[27:20]):
292 |                         (exp == 8'b00000010) ? (8'b00000000 - mag[28:21]):
293 |                         (exp == 8'b00000001) ? (8'b00000000 - mag[29:22]):
294 |                                (8'b00000000 - mag[30:23]));          
295 | 
296 | assign s3 = 1'b0;
297 | assign e3 =(e1 == 8'b00000000) ? 8'b01111111 : mbuff + 8'b01111111;
298 | assign m3 = 23'b00000000000000000000000; 
299 |                 
300 | endmodule                                                            
301 | 
302 | 
303 | 
304 | 
305 | 
306 | 
307 | 
308 | 
309 | 
310 | 
311 | 
312 | 
313 | module get_j (j, s1, e1, m1);
314 | 
315 | output [4:0]  j;
316 | input s1;
317 | input [7:0] e1;
318 | input [22:0] m1;
319 | 
320 | 
321 | wire [7:0] exp;
322 | wire [27:0] mag;
323 |                   
324 | assign exp = ( e1 - 8'b01111111 );               
325 | assign mag = {5'b00001 , m1};
326 | 
327 | assign j = (exp == 8'b00000100) ? mag[23:19]:
328 |            (exp == 8'b00000011) ? mag[24:20]:
329 |            (exp == 8'b00000010) ? mag[25:21]: 
330 |            (exp == 8'b00000001) ? mag[26:22]:
331 |            (exp == 8'b00000000) ? mag[27:23]: 5'b00000;
332 | 
333 | endmodule
334 | 
335 | module divide(s2, e2, m2, s1, e1, m1);
336 | 
337 | output s2;
338 | output [7:0] e2;
339 | output [22:0] m2;
340 | input s1; 
341 | input [7:0] e1; 
342 | input [22:0] m1;
343 | 
344 | 
345 | assign s2=s1;
346 | 
347 | assign  e2= ( e1 > 8'b10000011) ?  ( e1 - 8'b00000101 )  : 8'b00000000;
348 | 
349 | assign m2 = m1;
350 | 
351 | endmodule
352 | 
353 | 
354 | 
355 | module program (outs ,oute ,outm ,xs ,xe ,xm );
356 | 
357 | output outs;
358 | output [7:0] oute;
359 | output [22:0] outm;
360 | input xs;
361 | input [7:0] xe;
362 | input [22:0] xm;
363 | 
364 | wire lows,highs,ones,invs,ns,twoe9s,n1s,n2s,n2as,r1s,r1as,r1bs,r2s,l1s,l2s;
365 | wire a1s,a2s,ms,qs,ss,ps,rs,sleads,strails,e1s,nums;
366 | wire stemp19,stemp20,stemp21;
367 | wire stemp,stemp1,stemp2,stemp3,stemp4,stemp5,stemp6,stemp7,stemp8,stemp9;
368 | wire stemp10,stemp11,stemp12,stemp13,stemp14,stemp15,stemp16,stemp17,stemp18;
369 | wire [2:0] flag, flag2,flag3;
370 | wire [4:0] j;
371 | wire [7:0] lowe,highe,onee,inve,ne,twoe9e,n1e,n2e,n2ae,r1e,r1ae,r1be,r2e,l1e,l2e;
372 | wire [7:0] a1e,a2e,me,qe,se,pe,re,sleade,straile,e1e,nume,etemp19;
373 | wire [7:0] etemp,etemp1,etemp2,etemp3,etemp4,etemp5,etemp6,etemp7,etemp8,etemp9; 
374 | wire [7:0] etemp10,etemp11,etemp12,etemp13,etemp14,etemp15,etemp16,etemp17,etemp18;
375 | wire [22:0] lowm,highm,onem,invm,nm,twoe9m,n1m,n2m,n2am,r1m,r1am,r1bm,r2m,l1m,l2m;
376 | wire [22:0] a1m,a2m,mm,qm,sm,pm,rm,sleadm,strailm,e1m,numm,mtemp19;
377 | wire [22:0] mtemp,mtemp1,mtemp2,mtemp3,mtemp4,mtemp5,mtemp6,mtemp7,mtemp8,mtemp9; 
378 | wire [22:0] mtemp10,mtemp11,mtemp12,mtemp13,mtemp14,mtemp15,mtemp16,mtemp17,mtemp18;
379 | 
380 | assign ones= 1'b0;
381 | assign onee= 8'b01111111;
382 | assign onem= 23'b00000000000000000000000;
383 | 
384 | assign lows= 1'b0;
385 | assign lowe= 8'b01100110;
386 | assign lowm= 23'b00000000000000000000000;
387 | 
388 | assign highs= 1'b0;
389 | assign highe= 8'b10000110;
390 | assign highm= 23'b10111000110101110111010;
391 | 
392 | assign nums=1'b0;
393 | assign nume=8'b10000100;
394 | assign numm=23'b00000000000000000000000;
395 | 
396 | assign invs=1'b0;
397 | assign inve=8'b10000100;
398 | assign invm=23'b01110001010101000111011;
399 | 
400 | assign twoe9s=1'b0;
401 | assign twoe9e=8'b10001000;
402 | assign twoe9m=23'b00000000000000000000000;
403 | 
404 | assign l1s=1'b0;
405 | assign l1e=8'b01111001;
406 | assign l1m=23'b01100010111001000000000;
407 | 
408 | assign l2s=1'b0;
409 | assign l2e=8'b01100110;
410 | assign l2m=23'b01111111011111010001110;
411 | 
412 | assign a1s=1'b0;
413 | assign a1e=8'b01111110;
414 | assign a1m=23'b01010101010101011101100;
415 | 
416 | 
417 | multiplication mul1(stemp,etemp,mtemp,invs,xs,inve,xe,invm,xm);
418 | round rou1(ns,ne,nm,stemp,etemp,mtemp);
419 | modulo32 mod1(stemp1,etemp1,mtemp1,ns,ne,nm);
420 | 
421 |  
422 | addition add1(n2as,n2ae,n2am,nums,stemp1,nume,etemp1,numm,mtemp1);
423 | assign n2s= (ns==1'b1) ? n2as : stemp1;
424 | assign n2e= (ns==1'b1) ? n2ae : etemp1;
425 | assign n2m= (ns==1'b1) ? n2am : mtemp1;
426 | 
427 | 
428 | assign stemp2= ~n2s;
429 | addition add2(n1s,n1e,n1m,ns,stemp2,ne,n2e,nm,n2m);
430 | assign stemp21= 1'b0;
431 | compare comp1(flag2,stemp21,twoe9s,ne,twoe9e,nm,twoe9m);
432 | 
433 | multiplication mul2(stemp3,etemp3,mtemp3,ns,l1s,ne,l1e,nm,l1m);
434 | assign stemp4= ~stemp3;
435 | addition add3(r1as,r1ae,r1am,stemp4,xs,etemp3,xe,mtemp3,xm);
436 | 
437 | multiplication mul3(stemp5,etemp5,mtemp5,n1s,l1s,n1e,l1e,n1m,l1m);
438 | assign stemp6= ~stemp5;
439 | addition add4(stemp7,etemp7,mtemp7,stemp6,xs,etemp5,xe,mtemp5,xm);
440 | assign stemp8= ~n2s;
441 | addition add5(stemp9,etemp9,mtemp9,stemp8,stemp7,n2e,etemp7,n2m,mtemp7);
442 | multiplication mul4(r1bs,r1be,r1bm,stemp9,l1s,etemp9,l1e,mtemp9,l1m);
443 | assign r1s= (flag2 == 3'b001) ? r1as : r1bs;
444 | assign r1e= (flag2 == 3'b001) ? r1ae : r1be;
445 | assign r1m= (flag2 == 3'b001) ? r1am : r1bm;
446 | 
447 | assign stemp10= ~ns;
448 | multiplication mul5(r2s,r2e,r2m,stemp10,l2s,ne,l2e,nm,l2m);
449 | divide d1(ms,me,mm,n1s,n1e,n1m);
450 | addition add6(rs,re,rm,r1s,r2s,r1e,r2e,r1m,r2m);
451 | multiplication mul6(stemp11,etemp11,mtemp11,rs,a2s,re,a2e,rm,a2m);
452 | addition add7(stemp12,etemp12,mtemp12,stemp11,a1s,etemp11,a1e,mtemp11,a1m);
453 | multiplication mul7(stemp13,etemp13,mtemp13,rs,rs,re,re,rm,rm);
454 | multiplication  mul8(qs,qe,qm,stemp13,stemp12,etemp13,etemp12,mtemp13,mtemp12);
455 | addition add8(stemp14,etemp14,mtemp14,r2s,qs,r2e,qe,r2m,qm);
456 | addition add9(ps,pe,pm,stemp14,r1s,etemp14,r1e,mtemp14,r1m);
457 | get_j get1(j,n2s,n2e,n2m);
458 | 
459 | 
460 | 
461 | assign sleads = 1'b0;
462 | assign strails = 1'b0;
463 | 
464 | assign sleade = 8'b01111111;
465 |  
466 | assign straile= (j == 5'b00000) ?  8'b00000000:
467 | 		(j == 5'b00001) ?  8'b01101010:
468 | 		(j == 5'b00010) ?  8'b01101001:
469 | 		(j == 5'b00011) ? 8'b01101011:
470 | 		(j == 5'b00100) ? 8'b01101000:
471 | 		(j == 5'b00101) ? 8'b01101101:
472 |                 (j == 5'b00110) ? 8'b01101100:
473 |                 (j == 5'b00111) ? 8'b01101101:
474 |                 (j == 5'b01000) ? 8'b01101101:
475 |                 (j == 5'b01001) ? 8'b01101101:
476 | 		(j == 5'b01010) ? 8'b01101101:
477 |                 (j == 5'b01011) ? 8'b01101001:
478 |                 (j == 5'b01100) ? 8'b01101100:
479 |                 (j == 5'b01101) ? 8'b01101100:
480 |                 (j == 5'b01110) ? 8'b01101101:
481 |                 (j == 5'b01111) ? 8'b01101101:
482 |                 (j == 5'b10000) ? 8'b01101101:
483 |                 (j == 5'b10001) ? 8'b01101101:
484 |                 (j == 5'b10010) ? 8'b01101101:
485 |                 (j == 5'b10011) ? 8'b01101011:
486 |                 (j == 5'b10100) ? 8'b01101101:
487 |                 (j == 5'b10101) ? 8'b01101101:
488 |                 (j == 5'b10110) ? 8'b01101011:
489 |                 (j == 5'b10111) ? 8'b01101100:
490 |                 (j == 5'b11000) ? 8'b01101101:
491 |                 (j == 5'b11001) ? 8'b01101101:
492 |                 (j == 5'b11010) ? 8'b01101100:
493 |                 (j == 5'b11011) ? 8'b01101010:
494 |                 (j == 5'b11100) ? 8'b01101010:
495 |                 (j == 5'b11101) ? 8'b01101101:
496 |                 (j == 5'b11110) ? 8'b01101101: 8'b01101101;
497 | 
498 | assign sleadm = (j == 5'b00000) ? 23'b00000000000000000000000:
499 |                 (j == 5'b00001) ? 23'b00000101100110110000000:
500 |                 (j == 5'b00010) ? 23'b00001011010101011000000:
501 |                 (j == 5'b00011) ? 23'b00010001001100000000000:
502 |                 (j == 5'b00100) ? 23'b00010111001010111000000:
503 |                 (j == 5'b00101) ? 23'b00011101010010000000000:
504 |                 (j == 5'b00110) ? 23'b00100011100001111000000:
505 |                 (j == 5'b00111) ? 23'b00101001111010011000000:
506 |                 (j == 5'b01000) ? 23'b00110000011011111000000:
507 |                 (j == 5'b01001) ? 23'b00110111000110100000000:
508 |                 (j == 5'b01010) ? 23'b00111101111010100000000:
509 |                 (j == 5'b01011) ? 23'b01000100111000001000000:
510 |                 (j == 5'b01100) ? 23'b01001011111111011000000:
511 |                 (j == 5'b01101) ? 23'b01010011010000101000000:
512 |                 (j == 5'b01110) ? 23'b01011010101100000000000:
513 |                 (j == 5'b01111) ? 23'b01100010010001111000000:
514 |                 (j == 5'b10000) ? 23'b01101010000010011000000:
515 |                 (j == 5'b10001) ? 23'b01110001111101110000000:
516 |                 (j == 5'b10010) ? 23'b01111010000100010000000:  
517 | 		(j == 5'b10011) ? 23'b10000010010110001000000:
518 |                 (j == 5'b10100) ? 23'b10001010110011100000000:
519 |                 (j == 5'b10101) ? 23'b10010011011100110000000:
520 |                 (j == 5'b10110) ? 23'b10011100010010010000000:
521 |                 (j == 5'b10111) ? 23'b10100101010100000000000:
522 |                 (j == 5'b11000) ? 23'b10101110100010011000000:
523 |                 (j == 5'b11001) ? 23'b10110111111101110000000:
524 |                 (j == 5'b11010) ? 23'b11000001100110011000000:
525 |                 (j == 5'b11011) ? 23'b11001011011100100000000:
526 |                 (j == 5'b11100) ? 23'b11010101100000011000000: 
527 | 		(j == 5'b11101) ? 23'b11011111110010010000000:
528 |                 (j == 5'b11110) ? 23'b11101010010010101000000:
529 |                                   23'b11110101000001110000000;
530 | 
531 | assign strailm= (j == 5'b00000) ? 23'b00000000000000000000000:
532 |                 (j == 5'b00001) ? 23'b10100110001010110000101:
533 |                 (j == 5'b00010) ? 23'b10110011111001100010010:
534 |                 (j == 5'b00011) ? 23'b11010000000100100101110:
535 |                 (j == 5'b00100) ? 23'b11100011111010101000110:
536 |                 (j == 5'b00101) ? 23'b11001100010110100010111:
537 |                 (j == 5'b00110) ? 23'b00110111001110101011001:
538 |                 (j == 5'b00111) ? 23'b01111101010001111111100:
539 |                 (j == 5'b01000) ? 23'b10000010100011000110111:
540 |                 (j == 5'b01001) ? 23'b11001101110011101010101:
541 |                 (j == 5'b01010) ? 23'b10010011000001001000111:
542 |                 (j == 5'b01011) ? 23'b10000001100001100010010:
543 |                 (j == 5'b01100) ? 23'b01101010100110110001011:
544 |                 (j == 5'b01101) ? 23'b10101011010011101010100:
545 |                 (j == 5'b01110) ? 23'b11110111010100100001011:
546 |                 (j == 5'b01111) ? 23'b10101100000011101001011:
547 |                 (j == 5'b10000) ? 23'b10011001100111111100111:
548 |                 (j == 5'b10001) ? 23'b01111010001110110001100:
549 |                 (j == 5'b10010) ? 23'b00011100111110101100000:
550 |                 (j == 5'b10011) ? 23'b10011001010011001100111:
551 |                 (j == 5'b10100) ? 23'b01010000100010101010100:
552 |                 (j == 5'b10101) ? 23'b11101100001100110111001:
553 |                 (j == 5'b10110) ? 23'b10000010101000111111000:
554 |                 (j == 5'b10111) ? 23'b11011001000111110001001:
555 |                 (j == 5'b11000) ? 23'b11100110010101101011010:
556 |                 (j == 5'b11001) ? 23'b10111100101111101101100:
557 |                 (j == 5'b11010) ? 23'b11101110110000101010101:   
558 | 		(j == 5'b11011) ? 23'b10111001110111110010000:
559 |                 (j == 5'b11100) ? 23'b10111001111101110100101:
560 |                 (j == 5'b11101) ? 23'b11001100110111101110011:
561 |                 (j == 5'b11110) ? 23'b11101000101010010010010:
562 |                                   23'b10010110110110111001001;     
563 | 
564 | 
565 | 
566 | addition add10(ss,se,sm,sleads,strails,sleade,straile,sleadm,strailm);
567 | multiplication mul9(stemp15,etemp15,mtemp15,ss,ps,se,pe,sm,pm);
568 | addition add11(stemp16,etemp16,mtemp16,stemp15,strails,etemp15,straile,mtemp15,strailm);
569 | addition add12(e1s,e1e,e1m,sleads,stemp16,sleade,etemp16,sleadm,mtemp16);
570 | powertwo p1(stemp17,etemp17,mtemp17,ms,me,mm);
571 | multiplication mul10(stemp18,etemp18,mtemp18,stemp17,e1s,etemp17,e1e,mtemp17,e1m);
572 | /*
573 | assign stemp20= 1'b0;
574 | 
575 | compare comp2(flag,stemp20,highs,xe,highe,xm,highm);
576 | compare comp3(flag3,stemp20,lows,xe,lowe,xm,lowm);
577 | addition add13(stemp19,etemp19,mtemp19,ones,xs,onee,xe,onem,xm);
578 | 
579 | assign outs=(xe == 8'b11111111) ?
580 |             ((xm == 23'b00000000000000000000000)? ((xs==1'b0)? 1'b0:1'b0):
581 |              "x"):(flag == 3'b001) ?  stemp19 :
582 |                    (flag3 == 3'b100) ? 1'b0 : stemp18;
583 | 
584 | assign oute=(xe == 8'b11111111) ?
585 |             ((xm == 23'b00000000000000000000000)? ((xs==1'b0)? 8'b11111111:8'b00000000):
586 |              "xxxxxxxx"):(flag == 3'b001) ?  etemp19 :
587 |                    (flag3 == 3'b100) ? 8'b11111111 : etemp18;  
588 | 
589 | assign outm=(xe == 8'b11111111) ?
590 |             ((xm == 23'b00000000000000000000000)? ((xs==1'b0)? 23'b00000000000000000000000:23'b00000000000000000000000):
591 |              "xxxxxxxxxxxxxxxxxxxxxxx"):(flag == 3'b001) ?  mtemp19 :
592 |                    (flag3 == 3'b100) ? 23'b00000000000000000000000:mtemp18;  
593 | 
594 | */
595 | assign outs= stemp18;
596 | assign oute= etemp18;
597 | assign outm= mtemp18;
598 | 
599 | 
600 | endmodule
601 | 
602 | 
603 | module test_exp();
604 | 
605 | reg [31:0] x;
606 | wire [31:0] y;
607 | 
608 | wire xs;
609 | wire [7:0] xe;
610 | wire [22:0] xm;
611 | 
612 | wire ys;
613 | wire [7:0] ye;
614 | wire [22:0] ym;
615 | 
616 | assign xs = x[31];
617 | assign xe = x[30:23];
618 | assign xm = x[22:0];
619 | 
620 | assign ys = y[31];
621 | assign ye = y[30:23];
622 | assign ym = y[22:0];
623 | 
624 | program p(y[31],y[30:23],y[22:0],x[31],x[30:23],x[22:0]);
625 | 
626 | initial begin
627 | 	$dumpfile("exp.vcd");
628 | 	$dumpvars(0, test_exp);
629 | 	x = 32'h38a00000;
630 | 	#50000;
631 | 	$display("%H", y);
632 | end
633 | 
634 | endmodule
635 | 


--------------------------------------------------------------------------------
/filters/net.filter:
--------------------------------------------------------------------------------
1 | 000 L1_S
2 | 001 L1
3 | 010 L2_S
4 | 011 L2
5 | 100 DONE
6 | 


--------------------------------------------------------------------------------
/filters/sigmoid.filter:
--------------------------------------------------------------------------------
 1 | 0 ADD_START
 2 | 1 ADD
 3 | 2 DIV_START
 4 | 3 DIV
 5 | 4 ADD_2_START 
 6 | 5 ADD_2
 7 | 6 MUL_START
 8 | 7 MUL
 9 | 8 DONE
10 | 


--------------------------------------------------------------------------------
/guard.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | if [ "$#" -ne 0 ]; then
 3 | 	file=$1;
 4 | 	if [ -f "$file" ]; then
 5 | 		filename=${file##*/};
 6 | 		guard=$(echo __$(echo ${filename^^} | tr . _)__ );
 7 | 		echo -e "\`ifndef $guard\n\`define $guard\n$(cat $file)\n\`endif" > $file;
 8 | 	else
 9 | 		echo "NOT A FILE"
10 | 	fi
11 | else
12 | 	echo "Apply Guard to All Files in Directory? [y/N]"
13 | 	read yes
14 | 	if [ "$yes" = "y" ]; then
15 | 		for file in *.v; do
16 | 			filename=${file##*/};
17 | 			guard=$(echo __$(echo ${filename^^} | tr . _)__ );
18 | 			echo -e "\`ifndef $guard\n\`define $guard\n$(cat $file)\n\`endif" > $file;
19 | 		done
20 | 	fi
21 | fi
22 | 


--------------------------------------------------------------------------------
/input_conditioner.v:
--------------------------------------------------------------------------------
 1 | `ifndef __INPUT_CONDITIONER_V__
 2 | `define __INPUT_CONDITIONER_V__
 3 | //------------------------------------------------------------------------
 4 | // Input Conditioner
 5 | //    1) Synchronizes input to clock domain
 6 | //    2) Debounces input
 7 | //    3) Creates pulses at edge transitions
 8 | //    Taken from Lab 2
 9 | //------------------------------------------------------------------------
10 | 
11 | module input_conditioner
12 | (
13 | input 	    clk,            // Clock domain to synchronize input to
14 | input	    noisysignal,    // (Potentially) noisy input signal
15 | output reg  conditioned,    // Conditioned output signal
16 | output reg  positiveedge,   // 1 clk pulse at rising edge of conditioned
17 | output reg  negativeedge    // 1 clk pulse at falling edge of conditioned
18 | );
19 | 
20 |     parameter counterwidth = 3; // Counter size, in bits, >= log2(waittime)
21 |     parameter waittime = 3;     // Debounce delay, in clock cycles
22 |     
23 |     reg[counterwidth-1:0] counter = 0;
24 |     reg synchronizer0 = 0;
25 |     reg synchronizer1 = 0;
26 |     
27 |     always @(posedge clk ) begin
28 | 
29 | 		// posedge, negedge defaults to 0
30 | 		positiveedge <= 0;
31 | 		negativeedge <= 0;
32 | 
33 |         if(conditioned == synchronizer1)
34 |             counter <= 0;
35 |         else begin
36 |             if( counter == waittime) begin
37 |                 counter <= 0;
38 |                 conditioned <= synchronizer1;
39 |                 positiveedge <= synchronizer1;
40 |                 negativeedge <= !synchronizer1;
41 |             end
42 | 			else begin
43 |                 counter <= counter+1;
44 | 			end
45 |         end
46 |         synchronizer0 <= noisysignal;
47 |         synchronizer1 <= synchronizer0;
48 |     end
49 | endmodule
50 | `endif
51 | 


--------------------------------------------------------------------------------
/lerp.v:
--------------------------------------------------------------------------------
 1 | `ifndef __LERP_V__
 2 | `define __LERP_V__
 3 | `include "div_float.v"
 4 | `include "add_float.v"
 5 | `include "mul_float.v"
 6 | 
 7 | module lerp
 8 | #(parameter S=32)
 9 | (
10 | 	input clk,
11 | 	input start,
12 | 	input rst_n,
13 | 	input [31:0] x1,
14 | 	input [31:0] x2,
15 | 	input [31:0] y1,
16 | 	input [31:0] y2,
17 | 	input [31:0] x,
18 | 	output [31:0] y,
19 | 	output done
20 | );
21 | 
22 | // x --> (x-x1),(x2-x1),(y2-y1) --> (x-x1)/(x2-x1) -> (x-x1)/(x2-x1)*(y2-y1)
23 | // --> + y1
24 | 
25 | reg [31:0] xsx1;
26 | reg [31:0] x2sx1;
27 | reg [31:0] y2sy1;
28 | reg [31:0] dxx; // (x-x1)/(x2-x1)
29 | 
30 | reg [2:0] sub_done;
31 | reg [1:0] div_done;
32 | 
33 | // Phase 1
34 | add_float #(.FLOAT_WIDTH(S)) s1(rst_n, clk, start, 1'b1, x, x1, xsx1, nan, overflow, underflow, zero, sub_done[0]);
35 | add_float #(.FLOAT_WIDTH(S)) s2(rst_n, clk, start, 1'b1, x2, x1, x2sx1, nan, overflow, underflow, zero, sub_done[1]);
36 | add_float #(.FLOAT_WIDTH(S)) s3(rst_n, clk, start, 1'b1, y2, y1, y2sy1, nan, overflow, underflow, zero, sub_done[2]);
37 | 
38 | div_float #(.FLOAT_WIDTH(S)) d1(rst_n, clk, start, x1, `DELTA, dxx, zero, nan, overflow, underflow, zero_reg, div_done[0]);
39 | 
40 | // Phase 2
41 | div_float #(.FLOAT_WIDTH(S)) d2(rst_n, clk, &sub_done, xsx1, x2sx1, zero, nan, overflow, underflow, zero_reg, div_done[1]);
42 | 
43 | mul_float #(.FLOAT_WIDTH(S)) m1(rst_n, clk, div_done[1], x
44 | `endif
45 | 


--------------------------------------------------------------------------------
/matmul.t.v:
--------------------------------------------------------------------------------
  1 | `ifndef __MATMUL_T_V__
  2 | `define __MATMUL_T_V__
  3 | 
  4 | `include "matmul.v"
  5 | 
  6 | `define HEIGHT 4
  7 | `define WIDTH 1
  8 | `define COMMON 2
  9 | 
 10 | 
 11 | module test_matmul();
 12 | 
 13 | task print_mat;
 14 | 	parameter height = 1;
 15 | 	parameter width = 1;
 16 | 	input [height*width*32-1:0] mat;
 17 | 	integer i,j;
 18 | 	begin
 19 | 		$display("-----------------");
 20 | 		for(i=0; i<height; i=i+1) begin
 21 | 			for(j=0; j<width; j=j+1) begin
 22 | 				//$write("%H ", `ELEM(mat, i, j, height, width, 32));
 23 | 				$write("%H ", mat[height*width*32-1]);
 24 | 			end
 25 | 			$write("");
 26 | 		end
 27 | 	end
 28 | 
 29 | endtask
 30 | 
 31 | reg rst_n = 1'b0;
 32 | reg clk = 1'b0;
 33 | reg start = 1'b0;
 34 | 
 35 | reg [`HEIGHT*`COMMON*32-1:0] a; // A = H x C
 36 | reg [`COMMON*`WIDTH*32-1:0] b; // B = C x W
 37 | 
 38 | reg [31:0] data [0:7]; //dummy
 39 | 
 40 | wire [`HEIGHT*`WIDTH*32-1:0] o;
 41 | 
 42 | wire done;
 43 | 
 44 | matmul #(.S(32), .W(`WIDTH), .H(`HEIGHT), .C(`COMMON)) m(rst_n, clk, start, a, b, o, done);
 45 | 
 46 | always begin
 47 | 	#10
 48 | 	clk = !clk;
 49 | end
 50 | 
 51 | always @(posedge done) begin
 52 | 	//print_mat (a);
 53 | 	//print_mat(`COMMON, `WIDTH, b);
 54 | 	//print_mat(`HEIGHT, `WIDTH, o);
 55 | 	$display("a");
 56 | 	$display("%H", a);
 57 | 	$display("b");
 58 | 	$display("%H", b);
 59 | 	$display("o");
 60 | 	$display("%H", o);
 61 | end
 62 | 
 63 | initial begin
 64 | 	$dumpfile("matmul.vcd");
 65 | 	$dumpvars(0, test_matmul);
 66 | 
 67 | 	rst_n = 1'b0;
 68 | 	@(negedge clk);
 69 | 
 70 | 	$readmemh("data/w1.txt", data);
 71 | 	assign a = {data[0],data[1],data[2],data[3],data[4],data[5],data[6],data[7]};
 72 | 	b = {32'h3f800000, 32'h3f800000};
 73 | 
 74 | 	//a = {32'h3f8e5eea, 32'hbeb0ce44, 32'h3f1ba995, 32'h3f2418fc, 32'hbf364b07, 32'h3f945f07, 32'hbfdc0666, 32'h3ed5be0b, 32'hbeccd3d2, 32'h4011fa6b, 32'hc01163de, 32'h3e668c73 };
 75 | //b = {32'h3f5dc7de, 32'h3fb18dc7, 32'hbf842b78, 32'h3e9dcabb, 32'hbeb7666c, 32'h3e99c756, 32'hbf8e9161, 32'hbea4892c, 32'h3d425861, 32'h3f9f40d8, 32'h3f5847eb, 32'hbfc3d228, 32'h400e584d, 32'h3ec9113c, 32'hbbd2492e, 32'h3ef21373, 32'hbf3ff53d, 32'h3f7b0d8d };
 76 | 
 77 | 	start = 1'b1;
 78 | 	@(negedge clk);
 79 | 	start = 1'b0;
 80 | 	rst_n = 1'b1;
 81 | 	#500;
 82 | 
 83 | 	//rst_n = 1'b0;
 84 | 	//@(negedge clk);
 85 | 
 86 | 	//a = {32'hbf4ac269, 32'h40a00000, 32'h40a00000, 32'h40a00000};
 87 | 	//b = {32'h40a00000, 32'h40a00000, 32'h40a00000, 32'h40a00000};
 88 | 
 89 | 	//start = 1'b1;
 90 | 	//@(negedge clk);
 91 | 	//start = 1'b0;
 92 | 	//rst_n = 1'b1;
 93 | 	//#500;
 94 | 	$finish;
 95 | end
 96 | 
 97 | endmodule
 98 | 
 99 | `endif
100 | 


--------------------------------------------------------------------------------
/matmul.v:
--------------------------------------------------------------------------------
  1 | `ifndef __MATMUL_V__
  2 | `define __MATMUL_V__
  3 | `include "mul_float.v"
  4 | 
  5 | `include "add_float.v"
  6 | 
  7 | // TODO : check valid index
  8 | `define IDX(i,j,h,w) (((h)*(w)-1) - ((i)*(w)+j)) // indexed somewhat unconventionally
  9 | `define ELEM(m,i,j,h,w,s) m[s*(1+`IDX(i,j,h,w))-1:s*`IDX(i,j,h,w)]
 10 | 
 11 | 
 12 | `define S_INIT (2'b00)
 13 | `define S_MUL (2'b01)
 14 | `define S_ADD (2'b10)
 15 | 
 16 | module accumulate
 17 | // accumulate elements of I and put it into O
 18 | //X =  splitting line for recursive accumulation
 19 | #(parameter S=32, C=2, X=2**($clog2(C)-1))
 20 | (
 21 | 	input rst_n,
 22 | 	input clk,
 23 | 	input start,
 24 | 	input [S*C-1:0] I, // input array
 25 | 	output [S-1:0] O, // 1 floating point number
 26 | 	output done
 27 | );
 28 | 
 29 | reg [1:0] stage = 0; //accum --> add
 30 | wire add_start = (stage == 1);
 31 | wire add_rst_n = (stage == 2);
 32 | 
 33 | always @(negedge clk) begin
 34 | 	if(rst_n == 0 | start) begin
 35 | 		stage <= 0;
 36 | 	end 
 37 | end
 38 | 
 39 | always @(posedge clk) begin
 40 | 	case(stage)
 41 | 		0: begin
 42 | 			// accum left, right
 43 | 			if(done_l && done_r) begin
 44 | 				stage <= stage+1;
 45 | 			end
 46 | 		end
 47 | 		1: begin
 48 | 			// add-start
 49 | 			stage <= stage+1;
 50 | 		end
 51 | 		2: begin
 52 | 			if(add_done) begin
 53 | 				stage <= stage+1;
 54 | 			end
 55 | 		end
 56 | 		3: begin
 57 | 
 58 | 		end
 59 | 		default: begin
 60 | 
 61 | 		end
 62 | 	endcase
 63 | 
 64 | end
 65 | 
 66 | wire nan, overflow, underflow, zero; // don't really care for now
 67 | wire done_l, done_r;
 68 | wire add_done;
 69 | 
 70 | if(C == 1) begin
 71 | 	assign done = 1'b1;
 72 | 	// direct assignment
 73 | 	assign O = I;
 74 | end else begin
 75 | 	wire [S-1:0] o_l;
 76 | 	wire [S-1:0] o_r;
 77 | 
 78 | 	accumulate #(.S(S), .C(C-X)) ac_l(rst_n, clk, start, I[S*C-1:S*X], o_l, done_l); // accumulate left side
 79 | 	accumulate #(.S(S), .C(X)) ac_r(rst_n, clk,  start, I[S*X-1:0], o_r, done_r); // accumulate right side
 80 | 	add_float #(.FLOAT_WIDTH(S)) add(add_rst_n, clk, add_start, 1'b0, o_l, o_r, O, nan, overflow, underflow, zero, add_done);
 81 | 	assign done = (stage == 3);
 82 | end
 83 | endmodule
 84 | 
 85 | module matmul // size = 32 bits, width, height, common
 86 | #(parameter S=32, W=2, H=2, C=2)
 87 | (
 88 | 	// H x W
 89 | 	// 2x5 * 5x3 = 2x3
 90 | 	// H*C * C*W = H*W
 91 | 	// row major
 92 | 	input rst_n,
 93 | 		input clk,
 94 | 		input start,
 95 | 
 96 | 		input [S*H*C-1:0] a,
 97 | 		input [S*C*W-1:0] b,
 98 | 		output [S*H*W-1:0] o,
 99 | 		output done
100 | 	);
101 | 
102 | 	reg [2:0] stage = 0;
103 | 
104 | 	wire mul_start = (stage == 0);
105 | 	wire accum_start = (stage == 2);
106 | 	always @(negedge clk) begin
107 | 		if(start)
108 | 			stage = 0;
109 | 	end
110 | 	always @(posedge clk) begin
111 | 		if(start)
112 | 			stage = 0;
113 | 		else begin
114 | 			case(stage)
115 | 				0: begin
116 | 					stage = stage + 1;
117 | 				end
118 | 				1: begin
119 | 					if(&mult_all_done)
120 | 						stage = stage + 1;
121 | 				end
122 | 				2: begin
123 | 					stage = stage + 1;
124 | 				end
125 | 				3: begin
126 | 					if(&add_done)
127 | 						stage = stage + 1;
128 | 				end
129 | 				4: begin
130 | 
131 | 				end
132 | 
133 | 			endcase
134 | 		end
135 | 	end
136 | 
137 | 	wire nan;
138 | 	wire overflow;
139 | 	wire underflow;
140 | 	wire zero;
141 | 
142 | 	wire [H*W-1:0] add_done;
143 | 	wire [H*W-1:0] mult_all_done;
144 | 
145 | 	genvar i,j,k;
146 | 	integer l;
147 | 
148 | 	generate
149 | 
150 | 	for(i=0; i<H; i=i+1) begin: row
151 | 		for(j=0; j<W; j=j+1) begin: col
152 | 			wire [S*C-1:0] o_tmp; // store multiplication results
153 | 			wire [C-1:0] mult_done;
154 | 
155 | 			// multiply
156 | 			for(k=0; k<C; k=k+1) begin : mul
157 | 				mul_float #(.FLOAT_WIDTH(S)) mul(rst_n, clk, start, `ELEM(a,i,k,H,C,S), `ELEM(b,k,j,C,W,S), `ELEM(o_tmp,0,k,1,C,S), nan, overflow, underflow, zero, mult_done[k]);
158 | 				// -->outputs stored to C-length array o_tmp
159 | 				// debugging
160 | 				//always @(o_tmp) begin
161 | 				//	if(i == 0 && j == 1) begin
162 | 					//		$write("(%d, %d) * (%d,%d)", i, k, k, j);
163 | 					//		$write("%H * ", `ELEM(a,i,k,H,C,S));
164 | 					//		$write("%H = ", `ELEM(b,k,j,C,W,S));
165 | 					//		$write("%H", `ELEM(o_tmp,0,k,1,C,S));
166 | 					//	end
167 | 					//end
168 | 				end
169 | 
170 | 				assign mult_all_done[i*W+j] = &mult_done;
171 | 
172 | 				// accumulate
173 | 				accumulate #(.S(32), .C(C)) acc(rst_n, clk, accum_start, o_tmp, `ELEM(o,j,i,W,H,S), add_done[i*W+j]);
174 | 			end
175 | 		end
176 | 
177 | 		endgenerate
178 | 
179 | 		assign done = &add_done; // only done when all elements are completed
180 | 
181 | 		endmodule
182 | 	`endif
183 | 


--------------------------------------------------------------------------------
/mul_float.t.v:
--------------------------------------------------------------------------------
 1 | `ifndef __MUL_FLOAT_T_V__
 2 | `define __MUL_FLOAT_T_V__
 3 | `include "mul_float.v"
 4 | 
 5 | module test_mul();
 6 | 
 7 | reg rst_n;
 8 | reg clk=0;
 9 | reg start;
10 | 
11 | reg [31:0] op1;
12 | reg [31:0] op2;
13 | 
14 | wire [31:0] out;
15 | wire nan;
16 | wire overflow;
17 | wire underflow;
18 | wire zero;
19 | wire done;
20 | 
21 | mul_float #(.FLOAT_WIDTH(32)) dut(
22 | 	//inputs
23 | 	.rst_n(rst_n), 
24 | 	.clk(clk), 
25 | 	.start(start),
26 | 	.op1(op1), 
27 | 	.op2(op2),
28 | 	//outputs
29 | 	.out_reg(out),
30 | 	.nan_reg(nan),
31 | 	.overflow_reg(overflow),
32 | 	.underflow_reg(underflow),
33 | 	.zero_reg(zero),
34 | 	.done_reg(done)
35 | );
36 | 
37 | always begin
38 | 	#10
39 | 	clk = !clk;
40 | end
41 | 
42 | always @(posedge done) begin
43 | 	$display("a b o");
44 | 	$display("%H %H %H", op1, op2, out);
45 | end
46 | 
47 | initial begin
48 | 
49 | 	$dumpfile("mul_float.vcd");
50 |     $dumpvars(0, test_mul);
51 | 
52 | 	rst_n = 1'b0;
53 | 	@(negedge clk);
54 | 	op1 = 32'h40a00000;
55 | 	op2 = 32'h40a00000;
56 | 	start = 1;
57 | 	@(negedge clk);
58 | 	start = 0;
59 | 	rst_n = 1'b1;
60 | 	#500;
61 | 	$finish;
62 | end
63 | 
64 | 
65 | endmodule
66 | `endif
67 | 


--------------------------------------------------------------------------------
/mul_float.v:
--------------------------------------------------------------------------------
  1 | `ifndef __MUL_FLOAT_V__
  2 | `define __MUL_FLOAT_V__
  3 | module mul_float
  4 |   #(parameter
  5 |    FLOAT_WIDTH = 64
  6 |   )
  7 |   (
  8 |   input wire rst_n, clk, start,
  9 |   input wire [FLOAT_WIDTH - 1: 0] op1, op2,
 10 |   output reg [FLOAT_WIDTH - 1: 0] out_reg,
 11 |   output reg nan_reg,
 12 |   output reg overflow_reg,
 13 |   output reg underflow_reg,
 14 |   output reg zero_reg,
 15 |   output reg done_reg
 16 |   );
 17 |   localparam EXP_WIDTH = (FLOAT_WIDTH == 64) ? 11: 8; 
 18 |   localparam FRACTION_WIDTH = (FLOAT_WIDTH == 64) ? 52: 23;
 19 |   localparam FULL_FRACTION_WIDTH = FRACTION_WIDTH + 3;
 20 |   localparam SIGN_BIT =  FLOAT_WIDTH - 1;
 21 |   localparam EXP_MSB = SIGN_BIT - 1;
 22 |   localparam EXP_LSB = EXP_MSB - EXP_WIDTH + 1;
 23 |   localparam EXP_SHIFT = (2 ** (EXP_WIDTH - 1)) - 1;
 24 |   localparam EXP_MAX = (2 ** (EXP_WIDTH)) - 1;
 25 |   localparam FRACTION_MSB = EXP_LSB - 1;
 26 |   localparam NAN_VALUE = (FLOAT_WIDTH == 64) ? 64'h7FF8_0000_0000_0000: 32'hFFC0_0000;
 27 |   localparam INF_VALUE = (FLOAT_WIDTH == 64) ? 64'h7FF0_0000_0000_0000: 32'h7F80_0000;
 28 |   localparam PRODUCT_WIDTH = (FRACTION_WIDTH + 1) * 2;
 29 |   localparam STAGE_REG_WIDTH = 2;
 30 |   localparam MAX_STAGE_REG = 2;
 31 |   
 32 |   wire [EXP_WIDTH - 1: 0] exp1 = op1[EXP_MSB: EXP_LSB],
 33 |     exp2 = op2[EXP_MSB: EXP_LSB];
 34 | 	
 35 |   wire [FRACTION_WIDTH: 0] frac1 = {1'b1, op1[FRACTION_MSB: 0]},
 36 |   frac2 = {1'b1, op2[FRACTION_MSB: 0]};
 37 |   
 38 |   reg [STAGE_REG_WIDTH - 1: 0] stage_reg;
 39 |   wire [STAGE_REG_WIDTH - 1: 0] next_stage = stage_reg + 1;
 40 |   
 41 |   always@(posedge clk or negedge rst_n)
 42 |   begin: stage_inc
 43 |     if(!rst_n)
 44 | 	   stage_reg <= 0;
 45 | 	 else
 46 | 	 begin
 47 | 	   if(start)
 48 | 		  stage_reg <= 0;
 49 | 		else if(stage_reg < MAX_STAGE_REG)
 50 |         stage_reg <= next_stage;
 51 |     end
 52 |   end
 53 | 
 54 |   reg [EXP_WIDTH + 1: 0] full_exp_sum_reg, full_exp_sum_after_correction_reg; //full exp sum has two additional bits
 55 |   reg [PRODUCT_WIDTH - 1: 0] full_frac_reg;
 56 |   reg [FRACTION_WIDTH + 1: 0] frac_res_before_rounding_reg;
 57 |  
 58 |   always@(posedge clk or negedge rst_n)
 59 |   begin
 60 |     if(!rst_n)
 61 | 	 begin
 62 | 
 63 | 	 end
 64 | 	 else
 65 |     begin
 66 | 	   if(stage_reg == 0)
 67 | 		begin
 68 | 	     full_exp_sum_reg <= exp1 + exp2 - EXP_SHIFT;
 69 | 	     full_frac_reg <= frac1 * frac2; 
 70 | 		end
 71 | 		else if(stage_reg == 1)
 72 | 		begin
 73 | 		  //exp correction must be undertaken
 74 | 		  full_exp_sum_after_correction_reg <= full_exp_sum_reg + full_frac_reg[PRODUCT_WIDTH - 1];
 75 | 		  frac_res_before_rounding_reg <= full_frac_reg[PRODUCT_WIDTH - 1]? full_frac_reg[PRODUCT_WIDTH - 1: PRODUCT_WIDTH - FRACTION_WIDTH - 2] : full_frac_reg[PRODUCT_WIDTH - 2: PRODUCT_WIDTH - FRACTION_WIDTH - 3];
 76 | 		end
 77 | 		// it is not the end...
 78 | 	 end
 79 |   end
 80 | 
 81 |   wire [FRACTION_WIDTH + 1: 0] frac_res_after_rounding = frac_res_before_rounding_reg + 1;
 82 |   wire [FRACTION_WIDTH - 1: 0] frac_res = frac_res_after_rounding[FRACTION_WIDTH: 1];
 83 |   wire sign1 = op1[SIGN_BIT],
 84 |     sign2 = op2[SIGN_BIT];
 85 |   wire sign_res = sign1 ^ sign2;
 86 |   wire [EXP_WIDTH - 1: 0] exp_res = full_exp_sum_after_correction_reg[EXP_WIDTH - 1: 0]; 
 87 |   wire 
 88 |   is_zero1 = (op1 & INF_VALUE) == 0,
 89 |   is_zero2 = (op2 & INF_VALUE) == 0,
 90 |   is_nan1 = &exp1 && (op1[FRACTION_WIDTH - 1: 0] != 0),
 91 |   is_nan2 = &exp2 && (op2[FRACTION_WIDTH - 1: 0] != 0),
 92 |   is_inf1 = &exp1 && (op1[FRACTION_WIDTH - 1: 0] == 0),
 93 |   is_inf2 = &exp2 && (op2[FRACTION_WIDTH - 1: 0] == 0),
 94 |   is_inf_result = (full_exp_sum_after_correction_reg[EXP_WIDTH + 1: EXP_WIDTH] == 2'b01) || ((full_exp_sum_after_correction_reg[EXP_WIDTH + 1: EXP_WIDTH] == 2'b00) && (&full_exp_sum_after_correction_reg[EXP_WIDTH - 1: 0])),
 95 |   is_nan_result = is_nan1 || is_nan2 || (is_zero1 && is_inf2) || (is_inf1 && is_zero2),
 96 |   is_overflow_result = is_inf_result && !(is_inf1 || is_inf2) && !is_nan_result,
 97 |   is_underflow_result = (full_exp_sum_after_correction_reg[EXP_WIDTH + 1] || (exp_res == 0)) && !(is_zero1 || is_zero2) && !is_overflow_result && !is_nan_result,
 98 |   is_zero_result = (is_zero1 || is_zero2 || (exp_res == 0) || is_underflow_result) && !is_overflow_result && !is_nan_result;
 99 |   
100 |   always@(posedge clk or negedge rst_n)
101 |   begin: result_out
102 |     if(!rst_n)
103 | 	 begin
104 |       out_reg <= 0;
105 | 	 end
106 | 	 else
107 |     if(stage_reg == 2)
108 |     begin
109 | 	   if(is_nan_result)
110 | 		begin
111 | 		  out_reg <= NAN_VALUE;
112 | 		end
113 | 		else if(is_zero_result)
114 | 		begin
115 | 		  out_reg <= {sign_res, {(FRACTION_WIDTH + EXP_WIDTH){1'b0}}};
116 | 		end
117 | 		else if(is_inf_result)
118 | 		begin
119 | 		  out_reg <= {sign_res,  {EXP_WIDTH{1'b1}}, {FRACTION_WIDTH{1'b0}}};
120 | 		end
121 | 		else
122 | 		begin
123 | 	     out_reg <= {sign_res, exp_res, frac_res};
124 | 		end
125 | 	 end
126 |   end
127 |   
128 |   always@(posedge clk or negedge rst_n)
129 |   begin: done_out
130 |     if(!rst_n)
131 | 	 begin
132 |       done_reg <= 0;
133 | 	 end
134 | 	 else
135 | 	 begin
136 |       done_reg <= stage_reg == MAX_STAGE_REG; //done
137 |     end
138 |   end  
139 |   
140 |   always@(posedge clk or negedge rst_n)
141 |   begin: aux_outs
142 |     if(!rst_n)
143 | 	 begin
144 |       nan_reg <= 0;
145 |       overflow_reg <= 0;
146 |       underflow_reg <= 0;
147 |       zero_reg <= 0;
148 | 	 end
149 | 	 else
150 | 	 begin
151 |       if(stage_reg == 2)
152 | 		begin
153 | 		  nan_reg <= is_nan_result;
154 |         overflow_reg <= is_overflow_result;
155 |         underflow_reg <= is_underflow_result;
156 |         zero_reg <= is_zero_result;
157 | 		end
158 |     end
159 |   end  
160 |   
161 | endmodule
162 | `endif
163 | 


--------------------------------------------------------------------------------
/net.t.v:
--------------------------------------------------------------------------------
 1 | `ifndef __NET_T_V__
 2 | `define __NET_T_V__
 3 | `include "net.v"
 4 | 
 5 | `define INPUT 2
 6 | `define HIDDEN 4
 7 | `define OUTPUT 1
 8 | 
 9 | module test_net();
10 | 
11 | task test_xor;
12 | 	input [31:0] a;
13 | 	input [31:0] b;
14 | 	begin
15 | 		rst_n = 1'b0;
16 | 		#100
17 | 		@(negedge clk);
18 | 		x = {a,b};
19 | 		start = 1'b1;
20 | 		@(negedge clk);
21 | 		rst_n = 1'b1;
22 | 		start = 1'b0;
23 | 		@(posedge done);
24 | 	end
25 | endtask
26 | 
27 | 
28 | reg clk = 0;
29 | always begin
30 | 	#10
31 | 	clk = !clk;
32 | end
33 | 
34 | reg rst_n;
35 | reg start;
36 | 
37 | reg [32*`INPUT*1-1:0] x;
38 | wire [32*`OUTPUT*1-1:0] y;
39 | 
40 | wire [31:0] zero = 32'h00000000;
41 | wire [31:0] one  = 32'h3f800000; 
42 | 
43 | wire done;
44 | 
45 | net #(.I(`INPUT), .O(`OUTPUT), .H(`HIDDEN), .D(1)) n(clk, rst_n, start, x, y, done);
46 | 
47 | always @(posedge done) begin
48 | 	$display("%H ^ %H = %H", x[31:0], x[63:32], y);
49 | end
50 | 
51 | initial begin
52 | 	$dumpfile("net.vcd");
53 | 	$dumpvars(0, test_net);
54 | 	#500;
55 | 	test_xor(zero,zero); // --> 0
56 | 	#500;
57 | 	test_xor(zero,one); // --> 1
58 | 	#500;
59 | 	test_xor(one,zero); // --> 1
60 | 	#500;
61 | 	test_xor(one,one); // --> 0
62 | 	#500;
63 | 	$finish;
64 | end
65 | 
66 | endmodule
67 | 
68 | `endif
69 | 


--------------------------------------------------------------------------------
/net.v:
--------------------------------------------------------------------------------
  1 | `ifndef __NET_V__
  2 | `define __NET_V__
  3 | 
  4 | `include "sigmoid.v"
  5 | `include "matmul.v"
  6 | `include "add_float.v"
  7 | 
  8 | /// vectorized float addition
  9 | 
 10 | module add_float_v
 11 | #(parameter S=32, N=1)
 12 | (
 13 | 	input rst_n,
 14 | 	input clk,
 15 | 	input start,
 16 | 	input [S*N-1:0] a,
 17 | 	input [S*N-1:0] b,
 18 | 	output [S*N-1:0] o,
 19 | 	output done
 20 | );
 21 | 
 22 | wire nan, overflow, underflow, zero; // don't care
 23 | 
 24 | wire [N-1:0] done_elem;
 25 | 
 26 | genvar i;
 27 | generate
 28 | for(i=0;i<N;i=i+1) begin: add_elem
 29 | 	add_float #(.FLOAT_WIDTH(S)) add(rst_n, clk, start, 1'b0, a[(i+1)*S-1: i*S], b[(i+1)*S-1:i*S], o[(i+1)*S-1:i*S], nan, overflow, underflow, zero, done_elem[i]);
 30 | end
 31 | endgenerate
 32 | 
 33 | assign done = &done_elem; // done only when all done
 34 | 
 35 | endmodule
 36 | 
 37 | 
 38 | module layer
 39 | #(parameter S=32, I=1, O=1)
 40 | (
 41 | 	input clk,
 42 | 	input rst_n,
 43 | 	input start,
 44 | 	input [I*S-1:0] x,
 45 | 	output [O*S-1:0] y,
 46 | 	output done
 47 | );
 48 | 
 49 | reg [2:0] stage = 0;
 50 | 
 51 | reg [S-1:0] _W[0:O*I-1];
 52 | reg [S-1:0] _b[0:O-1];
 53 | 
 54 | wire [S*O*I-1:0] W;
 55 | wire [S*O-1:0] b;
 56 | 
 57 | // unpack
 58 | genvar i,o;
 59 | generate
 60 | for(o=0; o<O; o=o+1) begin
 61 | 	assign `ELEM(b,o,0,O,1,S) = _b[o];
 62 | 	//assign b[S*(o+1)-1:S*o] = _b[O-1-o]; // load backwards.
 63 | 	for(i=0; i<I; i=i+1) begin
 64 | 		assign `ELEM(W,o,i,O,I,S) = _W[o*I+i]; //m,i,j,h,w,s
 65 | 	end
 66 | end
 67 | 
 68 | 
 69 | endgenerate
 70 | 
 71 | wire [S*O-1:0] o_1;
 72 | wire [S*O-1:0] o_2;
 73 | 
 74 | wire mul_start;
 75 | wire add_start;
 76 | wire sig_start;
 77 | 
 78 | wire mul_done;
 79 | wire add_done;
 80 | wire sig_done;
 81 | 
 82 | assign mul_start = (stage == 0);
 83 | assign add_start = (stage == 2);
 84 | assign sig_start = (stage == 4);
 85 | 
 86 | wire mul_rst_n = rst_n;
 87 | wire add_rst_n = (stage != 2);
 88 | wire sig_rst_n = (stage != 4);
 89 | 
 90 | assign done = (stage == 6);
 91 | 
 92 | always @(negedge clk) begin
 93 | 	if(start) begin
 94 | 		stage = 0;
 95 | 	end 
 96 | end
 97 | 
 98 | always @(posedge clk) begin
 99 | 	case(stage)
100 | 		0: begin
101 | 			stage = 1;
102 | 		end
103 | 		1: begin
104 | 			if(mul_done) begin
105 | 				stage = stage + 1;
106 | 			end
107 | 		end
108 | 		2: begin
109 | 			//$display("I : %H", x);
110 | 			//$display("O : %H", o_1);
111 | 			stage = 3;
112 | 		end
113 | 		3: begin
114 | 			if(add_done) begin
115 | 				//$display("B : %H", b);
116 | 				//$display("O_2 : %H", o_2);
117 | 				stage = stage + 1;
118 | 			end
119 | 		end
120 | 		4: begin
121 | 			stage = 5;
122 | 		end
123 | 		5: begin
124 | 			if(sig_done) begin
125 | 				//$display(".>Y : %H", y);
126 | 				stage = stage + 1;
127 | 			end
128 | 		end
129 | 		6: begin
130 | 			//stay at 6
131 | 		end
132 | 		default: begin
133 | 
134 | 		end
135 | 	endcase
136 | end
137 | 
138 | matmul #(.S(S), .W(1), .H(O), .C(I)) m(mul_rst_n, clk, mul_start, W, x, o_1, mul_done);
139 | add_float_v #(.S(S), .N(O)) add(add_rst_n, clk, add_start, o_1, b, o_2, add_done); // o_1 -(+b)-> o_2
140 | sigmoid #(.S(S), .N(O)) sig(clk, sig_rst_n, sig_start, o_2,  y, sig_done); //o_2 -(sig())-> y
141 | 
142 | endmodule
143 | 
144 | 
145 | module net
146 | #(
147 | 	parameter I = 784,
148 | 	parameter O = 10,
149 | 	parameter H = 75,
150 | 	parameter D = 1 // depth of array
151 | )
152 | (
153 | 	input clk,
154 | 	input rst_n,
155 | 	input start,
156 | 	input [I*32-1:0] x,
157 | 	output [O*32-1:0] y,
158 | 	output done
159 | );
160 | 
161 | localparam S = 32;
162 | 
163 | reg [2:0] stage = 0;
164 | 
165 | wire [H*32-1:0] o_1; // intermediate unit for hidden layer
166 | wire done_1, done_2;
167 | 
168 | layer #(.S(S), .I(I), .O(H)) l_1(clk, l_1_rst_n, start_1, x, o_1, done_1);
169 | layer #(.S(S), .I(H), .O(O)) l_2(clk, l_2_rst_n, start_2, o_1, y, done_2);
170 | 
171 | initial begin
172 | 	$readmemh("data/w1.txt", l_1._W);
173 | 	$readmemh("data/b1.txt", l_1._b);
174 | 	$readmemh("data/w2.txt", l_2._W);
175 | 	$readmemh("data/b2.txt", l_2._b);
176 | end
177 | 
178 | wire l_1_rst_n = !start_1;
179 | wire l_2_rst_n = !start_2;
180 | 
181 | wire start_1 = (stage == 0);
182 | wire start_2 = (stage == 2);
183 | 
184 | assign done = (stage == 4);
185 | 
186 | always @(posedge clk) begin
187 | 	if(start) begin
188 | 		stage = 0;
189 | 	end else begin
190 | 
191 | 	case(stage)
192 | 		0: begin
193 | 			stage = stage + 1;
194 | 		end
195 | 		1: begin
196 | 			if(done_1)
197 | 				stage = stage + 1;
198 | 		end
199 | 		2: begin
200 | 			stage = stage + 1;
201 | 		end
202 | 		3: begin
203 | 			if(done_2)
204 | 				stage = stage + 1;
205 | 		end
206 | 		4: begin
207 | 
208 | 		end
209 | 	endcase
210 | end
211 | end
212 | 
213 | endmodule
214 | 
215 | //genvar i;
216 | //generate
217 | //
218 | //for(i=0; i<D;i=i+1) begin
219 | //	initial begin
220 | //		// initialize weight elem.
221 | //		matmul #(.S(32) .W()m(rst_n, clk, start_1, w_1, x, o_1, done_1); // matrix mult.
222 | //		sigmoid s(clk, rst_n, o_1, start_2, o_2, done_2); // activation
223 | //	end
224 | //end
225 | //if(i == 0) begin
226 | 	//	matmul m(rst_n, clk, start_1, w_1, x, o_1, done_1); // matrix mult.
227 | 	//	sigmoid s(clk, rst_n, o_1, start_2, o_2, done_2); // activation
228 | 	//end
229 | 	//// (... add bias)
230 | 	//endgenerate
231 | 	//
232 | 	//initial begin
233 | 	//	// $loadmemh(w_1, "");
234 | 	//	// load weights and biases
235 | 	//end
236 | 
237 | 
238 | `endif
239 | 


--------------------------------------------------------------------------------
/net_wrapper.t.v:
--------------------------------------------------------------------------------
 1 | `include "net_wrapper.v"
 2 | 
 3 | module test_net_wrapper();
 4 | 
 5 | reg clk = 0;
 6 | reg [3:0] sw;
 7 | reg [3:0] btn;
 8 | wire [3:0] led;
 9 | 
10 | integer i;
11 | 
12 | net_wrapper wrap(clk, sw, btn, led);
13 | 
14 | always begin
15 | 	#10;
16 | 	clk=!clk;
17 | end
18 | 
19 | initial begin
20 | 	#500;
21 | 	sw[0] = 1;
22 | 	sw[1] = 0;
23 | 	for(i=0; i<5; i=i+1) begin
24 | 		btn[0] = 1;
25 | 		@(negedge clk);
26 | 	end
27 | 	btn[0] = 0;
28 | 	@(posedge led[2]); // wait until led comes up... (indicating "done")
29 | 	$display("%b %b %d", led[0], led[1], $time);
30 | 	$finish;
31 | end
32 | 
33 | endmodule
34 | 


--------------------------------------------------------------------------------
/net_wrapper.v:
--------------------------------------------------------------------------------
 1 | `timescale 1ns / 1ps
 2 | 
 3 | `include "net.v"
 4 | `include "comp_float.v"
 5 | `include "input_conditioner.v"
 6 | 
 7 | module net_wrapper 
 8 | (
 9 |     input        clk,
10 |     input  [3:0] sw,
11 |     input  [3:0] btn,
12 |     output [3:0] led
13 | );
14 | 
15 | // sw[0],sw[1] are inputs to the XOR Classifier
16 | // btn[0] triggers the computation
17 | // led[0] indicates the output
18 | // led[1] indicates true XOR output
19 | // led[2] indicates that the calculation is done
20 | 
21 | reg [2:0] stage = 0; // idle, reset+load, start, progress
22 | 
23 | wire [31:0] one = 32'h3f800000; 
24 | wire [31:0] zero = 32'h00000000; 
25 | wire [31:0] half = 32'h3f000000;
26 | 
27 | wire [31:0] a = sw[0]? one:zero;
28 | wire [31:0] b = sw[1]? one:zero;
29 | wire [63:0] x = {a,b};
30 | wire [31:0] y;
31 | 
32 | wire [2:0] flag;
33 | wire rst_n = (stage != 1);
34 | wire start = (stage == 1);
35 | wire done;
36 | 
37 | wire btn_cnd; // conditioned button input
38 | wire btn_posedge;
39 | wire btn_negedge;
40 | 
41 | input_conditioner ic(clk, btn[0], btn_cnd, btn_posedge, btn_negedge);
42 | net #(.I(2), .O(1), .H(4), .D(1)) n(clk, rst_n, start, x, y, done);
43 | comp_float cmp(flag,y,half); // compare against 0.5
44 | 
45 | always @(posedge clk) begin
46 | 	case (stage)
47 | 		0: begin //idle
48 | 			if(btn_negedge)
49 | 				stage <= 1; // start
50 | 		end
51 | 		1: begin
52 | 			stage <= 2;
53 | 		end
54 | 		2: begin
55 | 			stage <= 3;
56 | 		end
57 | 		3: begin
58 | 			if(done)
59 | 				stage <= 4; // back to idle
60 | 		end
61 | 		4: begin
62 | 
63 | 		end
64 | 	endcase
65 | 
66 | end
67 | 
68 | // outputs
69 | assign led[0] = flag[2];
70 | assign led[1] = sw[0] ^ sw[1];
71 | assign led[2] = done;
72 | //assign led[3:1] = n.stage;
73 | 
74 | endmodule
75 | 


--------------------------------------------------------------------------------
/polyfit.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import numpy as np
 4 | from matplotlib import pyplot as plt
 5 | 
 6 | def test(x):
 7 |     print '{} : {} sec.'.format(x, f(x))
 8 | 
 9 | x = [10,20,30,40,50,60,100,150,300, 400, 784] # number of inputs
10 | y = [.43,.86,1.37,2.00,2.64,3.38,7.69,16.68,90.73, 165, 678.52] # time taken
11 | 
12 | p = np.polyfit(x,y,2)
13 | print p
14 | f = np.poly1d(p)
15 | 
16 | xs = range(784)
17 | plt.plot(x, y, 'o')
18 | plt.plot(xs, f(xs))
19 | plt.legend(['measured time','deduced time'])
20 | plt.show()
21 | 
22 | test(784)
23 | 


--------------------------------------------------------------------------------
/sigmoid.t.v:
--------------------------------------------------------------------------------
 1 | `ifndef __SIGMOID_T_V__
 2 | `define __SIGMOID_T_V__
 3 | `include "sigmoid.v"
 4 | 
 5 | `define NUM 2
 6 | module test_sigmoid();
 7 | 
 8 | reg clk=0;
 9 | reg rst_n;
10 | reg start;
11 | reg [32*`NUM-1:0] x;
12 | wire [32*`NUM-1:0] y;
13 | wire done;
14 | 
15 | sigmoid #(.S(32), .N(`NUM)) s(clk,rst_n,start,x,y,done);
16 | 
17 | always begin
18 | 	#10
19 | 	clk = !clk;
20 | end
21 | 
22 | always @(posedge done) begin
23 | 	$display("%H --> %H", x, y);
24 | end
25 | 
26 | initial begin
27 | 	$dumpfile("sigmoid.vcd");
28 | 	$dumpvars(0, test_sigmoid);
29 | 
30 | 	rst_n = 1'b0;
31 | 	@(negedge clk);
32 | 	x = {32'hc0733333,32'h40a00000};
33 | 	start = 1'b1;
34 | 	@(negedge clk);
35 | 	start = 1'b0;
36 | 	rst_n = 1'b1;
37 | 	@(posedge done);
38 | 	#100;
39 | 
40 | 	rst_n = 1'b0;
41 | 	@(negedge clk);
42 | 	x = {32'h0, 32'h40a00000};
43 | 	start = 1'b1;
44 | 	@(negedge clk);
45 | 	start = 1'b0;
46 | 	rst_n = 1'b1;
47 | 	@(posedge done);
48 | 	#100;
49 | 
50 | 	$finish;
51 | end
52 | 
53 | endmodule
54 | `endif
55 | 


--------------------------------------------------------------------------------
/sigmoid.v:
--------------------------------------------------------------------------------
  1 | `ifndef __SIGMOID_V__
  2 | `define __SIGMOID_V__
  3 | 
  4 | `include "div_float.v"
  5 | `include "mul_float.v"
  6 | `include "add_float.v"
  7 | `define GET(v,e,s) v[(e+1)*s-1:e*s]
  8 | 
  9 | module sigmoid
 10 | #(parameter S=32, parameter N=2)
 11 | (
 12 | 	input clk,
 13 | 	input rst_n,
 14 | 	input start,
 15 | 	input [S*N-1:0] x,
 16 | 	output [S*N-1:0] y,
 17 | 	output done
 18 | );
 19 | 
 20 | // implements fast sigmoid, x / (1 + abs(x))
 21 | 
 22 | // x -> abs(x) -> 1.0 + % -> x/% -> 1 + % -> 0.5 * %
 23 | reg [3:0] stage = 0; // up to 8
 24 | wire [31:0] one = 32'h3f800000;
 25 | wire [31:0] half = 32'h3f000000;
 26 | 
 27 | wire [S*N-1:0] opax; // one plus abs x
 28 | wire [S*N-1:0] xdo; // x div. opax
 29 | wire [S*N-1:0] hpx; // half plus xdo
 30 | 
 31 | wire [3:0] stage_done;
 32 | 
 33 | wire add_start = (stage == 0);
 34 | wire div_start = (stage == 2);
 35 | wire add_start_2 = (stage == 4);
 36 | wire mul_start = (stage == 6);
 37 | 
 38 | wire add_rst_n = (stage != 0);
 39 | wire div_rst_n = (stage != 2); //negedge right before stage == 2
 40 | wire add_rst_n_2 = (stage != 4);
 41 | wire mul_rst_n = (stage != 6);
 42 | 
 43 | wire nan, zero, overflow, underflow,  divzero;
 44 | 
 45 | assign done = (stage == 8);
 46 | 
 47 | always @(negedge clk) begin
 48 | 	if(start)
 49 | 		stage = 0;
 50 | end
 51 | 
 52 | always @(posedge clk) begin
 53 | 	if(start)
 54 | 		stage = 0;
 55 | 	else begin
 56 | 		case(stage)
 57 | 			0: begin
 58 | 				if(!start)
 59 | 					stage = stage + 1;
 60 | 			end
 61 | 			1: begin
 62 | 				if(stage_done[0]) begin
 63 | 					stage = stage + 1;	
 64 | 				end
 65 | 			end
 66 | 			2: begin
 67 | 				stage = stage + 1;
 68 | 			end
 69 | 			3: begin
 70 | 				if(stage_done[1]) begin
 71 | 					stage = stage+1;
 72 | 				end
 73 | 			end
 74 | 			4: begin
 75 | 				stage = stage + 1;
 76 | 			end
 77 | 			5: begin
 78 | 				if(stage_done[2]) begin
 79 | 					stage = stage + 1;
 80 | 				end
 81 | 			end
 82 | 			6: begin
 83 | 				stage = stage + 1;
 84 | 			end
 85 | 			7: begin
 86 | 				if(stage_done[3]) begin
 87 | 					stage = stage + 1;
 88 | 				end
 89 | 			end
 90 | 			8: begin
 91 | 
 92 | 			end
 93 | 			default: begin
 94 | 
 95 | 			end
 96 | 		endcase
 97 | 	end
 98 | end
 99 | 
100 | // TODO : change start/done signals
101 | generate
102 | genvar i;
103 | for(i=0; i<N; i=i+1) begin : each
104 | 	wire [S-1:0] absx = {1'b0, x[S*(i+1)-2:S*i]};
105 | 	add_float #(.FLOAT_WIDTH(S)) a1(add_rst_n, clk, add_start, 1'b0, absx, one, `GET(opax,i,S), nan, overflow, underflow, zero, stage_done[0]); // abs(x) + 1
106 | 	div_float #(.FLOAT_WIDTH(S)) d1(div_rst_n, clk, div_start, `GET(x,i,S), `GET(opax,i,S), `GET(xdo,i,S), divzero, nan, overflow, underflow, zero, stage_done[1]); // x / (abs(x)+1)
107 | 	add_float #(.FLOAT_WIDTH(S)) a2(add_rst_n_2, clk, add_start_2, 1'b0, `GET(xdo,i,S), one, `GET(hpx,i,S), nan, overflow, underflow, zero, stage_done[2]);
108 | 	mul_float #(.FLOAT_WIDTH(S)) mul(mul_rst_n, clk, mul_start, `GET(hpx,i,S), half, `GET(y,i,S), nan, overflow, underflow, zero, stage_done[3]);
109 | end
110 | endgenerate
111 | 
112 | endmodule
113 | `endif
114 | 


--------------------------------------------------------------------------------
/x.tcl:
--------------------------------------------------------------------------------
1 | set_property SEVERITY {Warning} [get_drc_checks NSTD-1]
2 | set_property SEVERITY {Warning} [get_drc_checks UCIO-1]
3 | 


--------------------------------------------------------------------------------