├── Agenda
    └── Agenda_10815
├── LICENSE
├── README.md
├── gen
    ├── Makefile
    ├── README
    ├── afu_csr.h
    ├── afu_csr.spec
    ├── afu_csr.sv
    └── afu_csr.vh
├── rtl
    ├── cacheline_buffer.sv
    ├── conv_forward_layer.sv
    ├── conv_forward_layer_tb.sv
    ├── inner_product_backward.sv
    ├── inner_product_backward_tb.sv
    ├── inner_product_forward.sv
    ├── inner_product_forward_tb.sv
    ├── loss_layer_tb.sv
    ├── loss_opt.sv
    ├── pooling_backward_layer_tb.sv
    ├── pooling_backward_opt.sv
    ├── qa_conv.sv
    ├── qip
    │   ├── float_add.bsf
    │   ├── float_add.cmp
    │   ├── float_add.inc
    │   ├── float_add.qip
    │   ├── float_add.v
    │   ├── float_add_bb.v
    │   ├── float_add_inst.v
    │   ├── float_add_syn.v
    │   ├── float_mult.bsf
    │   ├── float_mult.cmp
    │   ├── float_mult.inc
    │   ├── float_mult.qip
    │   ├── float_mult.v
    │   ├── float_mult_bb.v
    │   ├── float_mult_inst.v
    │   ├── float_mult_syn.v
    │   ├── iplauncher_debug.log
    │   ├── ram_2p.qip
    │   ├── ram_2p.v
    │   └── ram_2p_bb.v
    ├── relu_backward_layer.sv
    ├── relu_backward_layer.sv.bak
    ├── relu_backward_layer_tb.sv
    ├── relu_backward_layer_tb.sv.bak
    ├── relu_backward_opt.sv
    ├── relu_backward_opt_tb.sv
    ├── relu_forward.sv
    └── relu_forward_tb.sv
├── test
    ├── conv_forward_tests_header.py
    ├── pooling_backward_tests_header.py
    ├── pooling_forward_tests_header.py
    ├── relu_backward_tests_header.py
    ├── relu_forward_tests_header.py
    ├── softmax_with_loss_tests_header.py
    └── test_data
    │   ├── conv_forward_test_data.vh
    │   ├── inner_product_backward_test_data.vh
    │   ├── ip_backward_test_data.vh
    │   ├── ip_forward_test_data.vh
    │   ├── pooling_backward_test_data.vh
    │   ├── pooling_forward_test_data.vh
    │   ├── relu_backward_test_data.vh
    │   ├── relu_forward_test_data.vh
    │   └── softmax_with_loss_test_data.vh
└── tools
    ├── caffe_install_deps.sh
    └── nvidia_smi_command.sh


/Agenda/Agenda_10815:
--------------------------------------------------------------------------------
 1 | Agenda for team meeting (Oct 8,2015)
 2 | 
 3 |   Meta Data Management System
 4 |     GitHub Repo:  Use for SCV
 5 |       - To be setup
 6 |     git config --global user.name "Your Name"
 7 |     git config --global user.email you@example.com
 8 | 
 9 |     Use Gist for code snippit
10 | 
11 |     JIRA:  Use for workflow control
12 |       - Can be linked with GitHub
13 |       - Brian will host
14 |       - To be config
15 |       - Ultra-DNS service
16 | 
17 |     Confluence/GitWiki:  Use for Wiki [Doc]
18 |       - Which one to use?
19 |       - Use this as a doc for our proj
20 | 
21 |     Slack
22 |       - Done
23 |       - Thank Brian
24 | 
25 |   GoogleSite Blog
26 |     Make sure you can edit it
27 |     User-friendly Proposal 
28 |       - To be re-written
29 |  
30 |   Engineering Requirements
31 |     How are we doing it?
32 |       - The Steakholder has no list
33 |           - Translate Stk paragraph to bullet?
34 |           - Use it as a baseline? 
35 |       - We each should indep come up w/ 10 req
36 | 
37 |   
38 | 
39 |       
40 |     
41 |     
42 |   
43 | 
44 |     
45 |   
46 |   
47 | 
48 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Brian Hill
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Synopsis
 2 | 
 3 | The objective of this project is to showcase the increased performance and efficiency of using custom computing rather than a fixed processor architecture by bench-marking the FPGA implementation of deep learning kernels against the GPU implementation. Code created during this project will be given to the open source community to increase awareness of configurable hardware and demonstrate the utility of FPGAs for deep learning. In doing so, students will become more familiar with programming for an FPGA as well as applying deep learning in a real-world application: diabetic retinopathy. The project will also showcase the power of using deep CNNs for computer vision by creating a binary classification system for breast invasive carcinoma tissue images.
 4 | 
 5 | ## Code Example
 6 | 
 7 | Show what the library does as concisely as possible, developers should be able to figure out **how** your project solves their problem by looking at the code example. Make sure the API you are showing off is obvious, and that your code is short and concise.
 8 | 
 9 | ## Motivation
10 | 
11 | We propose to create a "scalable," energy efficient method of deep machine learning that helps the diagnosis in the field of medicine with more accuracy and less time. We will be applying this method to classify severity in diabetic retinopathy images; in the field of cancer we will be applying this to breast invasive carcinoma for classifying the tumor grade or absence of tumor. <br  /> <br  />
12 | 
13 | The implementation will be taking the most widely used machine learning open source algorithm that will be benchmarked against multiple implementations for energy efficiency study. Three of the most popular deep learning packages are Theano, Torch, and Caffe. We will select the most widely used open source software package that is used by the deep learning community and rewrite the kernels for a field-programmable gate array (FPGA) and compare it with other implementations (GPU) for energy efficiency study. All of the code will be shared with the open source community. 
14 | 
15 | 
16 | 
17 | 
18 | ## Installation
19 | 
20 | Provide code examples and explanations of how to get the project.
21 | 
22 | ## API Reference
23 | 
24 | Depending on the size of the project, if it is small and simple enough the reference docs can be added to the README. For medium size to larger projects it is important to at least provide a link to where the API reference docs live.
25 | 
26 | ## Tests
27 | 
28 | Describe and show how to run the tests with code examples.
29 | 
30 | ## Contributors
31 | 
32 | Brian Hill
33 | Sophia Zhang
34 | 
35 | ## License
36 | 
37 | A short snippet describing the license (MIT, Apache, etc.)
38 | 


--------------------------------------------------------------------------------
/gen/Makefile:
--------------------------------------------------------------------------------
1 | GEN_TOOL = /opt/python-2.7/bin/python2.7 ../../tools/afu_csr_gen.py
2 | 
3 | afu_csr.sv: afu_csr.spec
4 | 	$(GEN_TOOL)
5 | 
6 | clean:
7 | 	rm -f *.h *.vh *.sv
8 | 


--------------------------------------------------------------------------------
/gen/README:
--------------------------------------------------------------------------------
 1 | * Automatically generate RTL to implement and C code to access AFU control registers.
 2 | * Each register is mapped to a unique 32 bit register
 3 | * 64 bit registers are supported
 4 | * Each register can have a unique reset wire
 5 | 
 6 | 
 7 | 1. Create spec file format
 8 | 
 9 | # comment
10 | register name, number of bits [, reset signal name]
11 | 
12 | 
13 | 2. Generate code from afu_csr.spec file:
14 | 
15 | make
16 | 


--------------------------------------------------------------------------------
/gen/afu_csr.h:
--------------------------------------------------------------------------------
 1 | // Code generated by afu_csr_gen
 2 | 
 3 | #define CSR_AFU_DSM_BASE             0x8a00
 4 | #define CSR_AFU_CNTXT_BASE           0x8a08
 5 | #define CSR_AFU_EN                   0x8a10
 6 | #define CSR_DOORBELL                 0x8a14
 7 | #define CSR_READ_BUFFER_LINES        0x8a18
 8 | #define CSR_READ_BUFFER_BASE         0x8a1c
 9 | #define CSR_WRITE_BUFFER_BASE        0x8a24
10 | #define CSR_UPDATE_DSM               0x8a2c
11 | #define CSR_PLL_RESET                0x8a30
12 | #define CSR_LOAD_WEIGHTS             0x8a34
13 | #define CSR_NUM_CL_PER_FILTER        0x8a38
14 | #define CSR_NUM_FILTERS              0x8a3c
15 | #define CSR_MAX_WEIGHT_BUFFER_ADDR   0x8a40
16 | #define CSR_LOAD_IMAGES              0x8a44
17 | #define CSR_WRITE_FENCE              0x8a48
18 | 


--------------------------------------------------------------------------------
/gen/afu_csr.spec:
--------------------------------------------------------------------------------
 1 | # afu csrs
 2 | # register name, bits [, reset]
 3 | 
 4 | afu_en, 1
 5 | doorbell, 32, reset_doorbell
 6 | 
 7 | read_buffer_lines, 32
 8 | read_buffer_base, 64
 9 | write_buffer_base, 64
10 | 
11 | update_dsm, 32, reset_update_dsm
12 | 
13 | pll_reset, 1
14 | load_weights, 1
15 | num_cl_per_filter, 8
16 | num_filters, 16
17 | max_weight_buffer_addr, 16
18 | load_images, 1
19 | 
20 | write_fence, 1, DEFAULT
21 | 


--------------------------------------------------------------------------------
/gen/afu_csr.sv:
--------------------------------------------------------------------------------
  1 | // Code generated by afu_csr_gen
  2 | 
  3 | 
  4 | `include "spl.vh"
  5 | `include "afu.vh"
  6 | `include "afu_csr.vh"
  7 | 
  8 | module afu_csr
  9 |    (
 10 |     input logic clk,
 11 |     input logic resetb,
 12 |     spl_bus_t spl_bus,
 13 |     afu_bus_t afu_bus
 14 |     );
 15 | 
 16 |    always_ff @(posedge clk) begin
 17 |       if (spl_bus.rw_rsp.cfg_valid && {spl_bus.rw_rsp.header[13:0], 2'b0} == ADDR_AFU_DSM_BASEL) begin
 18 |          afu_bus.csr.afu_dsm_base[31:0] <= spl_bus.rw_rsp.data[31:0];
 19 |       end
 20 |    end
 21 | 
 22 |    always_ff @(posedge clk) begin
 23 |       if (~resetb) begin
 24 |          afu_bus.csr.afu_dsm_base_valid <= 0;
 25 |       end else if (spl_bus.rw_rsp.cfg_valid && {spl_bus.rw_rsp.header[13:0], 2'b0} == ADDR_AFU_DSM_BASEL) begin
 26 |          afu_bus.csr.afu_dsm_base_valid <= 1;
 27 |       end
 28 |    end
 29 | 
 30 |    always_ff @(posedge clk) begin
 31 |       if (spl_bus.rw_rsp.cfg_valid && {spl_bus.rw_rsp.header[13:0], 2'b0} == ADDR_AFU_DSM_BASEH) begin
 32 |          afu_bus.csr.afu_dsm_base[63:32] <= spl_bus.rw_rsp.data[31:0];
 33 |       end
 34 |    end
 35 | 
 36 |    always_ff @(posedge clk) begin
 37 |       if (spl_bus.rw_rsp.cfg_valid && {spl_bus.rw_rsp.header[13:0], 2'b0} == ADDR_AFU_CNTXT_BASEL) begin
 38 |          afu_bus.csr.afu_cntxt_base[31:0] <= spl_bus.rw_rsp.data[31:0];
 39 |       end
 40 |    end
 41 | 
 42 |    always_ff @(posedge clk) begin
 43 |       if (~resetb) begin
 44 |          afu_bus.csr.afu_cntxt_base_valid <= 0;
 45 |       end else if (spl_bus.rw_rsp.cfg_valid && {spl_bus.rw_rsp.header[13:0], 2'b0} == ADDR_AFU_CNTXT_BASEL) begin
 46 |          afu_bus.csr.afu_cntxt_base_valid <= 1;
 47 |       end
 48 |    end
 49 | 
 50 |    always_ff @(posedge clk) begin
 51 |       if (spl_bus.rw_rsp.cfg_valid && {spl_bus.rw_rsp.header[13:0], 2'b0} == ADDR_AFU_CNTXT_BASEH) begin
 52 |          afu_bus.csr.afu_cntxt_base[63:32] <= spl_bus.rw_rsp.data[31:0];
 53 |       end
 54 |    end
 55 | 
 56 |    always_ff @(posedge clk) begin
 57 |       if (spl_bus.rw_rsp.cfg_valid && {spl_bus.rw_rsp.header[13:0], 2'b0} == ADDR_AFU_EN) begin
 58 |          afu_bus.csr.afu_en <= spl_bus.rw_rsp.data[0];
 59 |       end
 60 |    end
 61 | 
 62 |    always_ff @(posedge clk) begin
 63 |       if (afu_bus.csr.reset_doorbell) begin
 64 |          afu_bus.csr.doorbell <= 0;
 65 |       end else if (spl_bus.rw_rsp.cfg_valid && {spl_bus.rw_rsp.header[13:0], 2'b0} == ADDR_DOORBELL) begin
 66 |          afu_bus.csr.doorbell <= spl_bus.rw_rsp.data[31:0];
 67 |       end
 68 |    end
 69 | 
 70 |    always_ff @(posedge clk) begin
 71 |       if (spl_bus.rw_rsp.cfg_valid && {spl_bus.rw_rsp.header[13:0], 2'b0} == ADDR_READ_BUFFER_LINES) begin
 72 |          afu_bus.csr.read_buffer_lines <= spl_bus.rw_rsp.data[31:0];
 73 |       end
 74 |    end
 75 | 
 76 |    always_ff @(posedge clk) begin
 77 |       if (spl_bus.rw_rsp.cfg_valid && {spl_bus.rw_rsp.header[13:0], 2'b0} == ADDR_READ_BUFFER_BASEL) begin
 78 |          afu_bus.csr.read_buffer_base[31:0] <= spl_bus.rw_rsp.data[31:0];
 79 |       end
 80 |    end
 81 | 
 82 |    always_ff @(posedge clk) begin
 83 |       if (spl_bus.rw_rsp.cfg_valid && {spl_bus.rw_rsp.header[13:0], 2'b0} == ADDR_READ_BUFFER_BASEH) begin
 84 |          afu_bus.csr.read_buffer_base[63:32] <= spl_bus.rw_rsp.data[31:0];
 85 |       end
 86 |    end
 87 | 
 88 |    always_ff @(posedge clk) begin
 89 |       if (spl_bus.rw_rsp.cfg_valid && {spl_bus.rw_rsp.header[13:0], 2'b0} == ADDR_WRITE_BUFFER_BASEL) begin
 90 |          afu_bus.csr.write_buffer_base[31:0] <= spl_bus.rw_rsp.data[31:0];
 91 |       end
 92 |    end
 93 | 
 94 |    always_ff @(posedge clk) begin
 95 |       if (spl_bus.rw_rsp.cfg_valid && {spl_bus.rw_rsp.header[13:0], 2'b0} == ADDR_WRITE_BUFFER_BASEH) begin
 96 |          afu_bus.csr.write_buffer_base[63:32] <= spl_bus.rw_rsp.data[31:0];
 97 |       end
 98 |    end
 99 | 
100 |    always_ff @(posedge clk) begin
101 |       if (afu_bus.csr.reset_update_dsm) begin
102 |          afu_bus.csr.update_dsm <= 0;
103 |       end else if (spl_bus.rw_rsp.cfg_valid && {spl_bus.rw_rsp.header[13:0], 2'b0} == ADDR_UPDATE_DSM) begin
104 |          afu_bus.csr.update_dsm <= spl_bus.rw_rsp.data[31:0];
105 |       end
106 |    end
107 | 
108 |    always_ff @(posedge clk) begin
109 |       if (spl_bus.rw_rsp.cfg_valid && {spl_bus.rw_rsp.header[13:0], 2'b0} == ADDR_PLL_RESET) begin
110 |          afu_bus.csr.pll_reset <= spl_bus.rw_rsp.data[0];
111 |       end
112 |    end
113 | 
114 |    always_ff @(posedge clk) begin
115 |       if (spl_bus.rw_rsp.cfg_valid && {spl_bus.rw_rsp.header[13:0], 2'b0} == ADDR_LOAD_WEIGHTS) begin
116 |          afu_bus.csr.load_weights <= spl_bus.rw_rsp.data[0];
117 |       end
118 |    end
119 | 
120 |    always_ff @(posedge clk) begin
121 |       if (spl_bus.rw_rsp.cfg_valid && {spl_bus.rw_rsp.header[13:0], 2'b0} == ADDR_NUM_CL_PER_FILTER) begin
122 |          afu_bus.csr.num_cl_per_filter <= spl_bus.rw_rsp.data[7:0];
123 |       end
124 |    end
125 | 
126 |    always_ff @(posedge clk) begin
127 |       if (spl_bus.rw_rsp.cfg_valid && {spl_bus.rw_rsp.header[13:0], 2'b0} == ADDR_NUM_FILTERS) begin
128 |          afu_bus.csr.num_filters <= spl_bus.rw_rsp.data[15:0];
129 |       end
130 |    end
131 | 
132 |    always_ff @(posedge clk) begin
133 |       if (spl_bus.rw_rsp.cfg_valid && {spl_bus.rw_rsp.header[13:0], 2'b0} == ADDR_MAX_WEIGHT_BUFFER_ADDR) begin
134 |          afu_bus.csr.max_weight_buffer_addr <= spl_bus.rw_rsp.data[15:0];
135 |       end
136 |    end
137 | 
138 |    always_ff @(posedge clk) begin
139 |       if (spl_bus.rw_rsp.cfg_valid && {spl_bus.rw_rsp.header[13:0], 2'b0} == ADDR_LOAD_IMAGES) begin
140 |          afu_bus.csr.load_images <= spl_bus.rw_rsp.data[0];
141 |       end
142 |    end
143 | 
144 |    always_ff @(posedge clk) begin
145 |       if (~resetb) begin
146 |          afu_bus.csr.write_fence <= 0;
147 |       end else if (spl_bus.rw_rsp.cfg_valid && {spl_bus.rw_rsp.header[13:0], 2'b0} == ADDR_WRITE_FENCE) begin
148 |          afu_bus.csr.write_fence <= spl_bus.rw_rsp.data[0];
149 |       end
150 |    end
151 | 
152 | endmodule


--------------------------------------------------------------------------------
/gen/afu_csr.vh:
--------------------------------------------------------------------------------
 1 | // Code generated by afu_csr_gen
 2 | 
 3 | 
 4 | `ifndef AFU_CSR_VH
 5 | `define AFU_CSR_VH
 6 | 
 7 | localparam ADDR_AFU_DSM_BASEL           = 16'h8a00;
 8 | localparam ADDR_AFU_DSM_BASEH           = 16'h8a04;
 9 | localparam ADDR_AFU_CNTXT_BASEL         = 16'h8a08;
10 | localparam ADDR_AFU_CNTXT_BASEH         = 16'h8a0c;
11 | localparam ADDR_AFU_EN                  = 16'h8a10;
12 | localparam ADDR_DOORBELL                = 16'h8a14;
13 | localparam ADDR_READ_BUFFER_LINES       = 16'h8a18;
14 | localparam ADDR_READ_BUFFER_BASEL       = 16'h8a1c;
15 | localparam ADDR_READ_BUFFER_BASEH       = 16'h8a20;
16 | localparam ADDR_WRITE_BUFFER_BASEL      = 16'h8a24;
17 | localparam ADDR_WRITE_BUFFER_BASEH      = 16'h8a28;
18 | localparam ADDR_UPDATE_DSM              = 16'h8a2c;
19 | localparam ADDR_PLL_RESET               = 16'h8a30;
20 | localparam ADDR_LOAD_WEIGHTS            = 16'h8a34;
21 | localparam ADDR_NUM_CL_PER_FILTER       = 16'h8a38;
22 | localparam ADDR_NUM_FILTERS             = 16'h8a3c;
23 | localparam ADDR_MAX_WEIGHT_BUFFER_ADDR  = 16'h8a40;
24 | localparam ADDR_LOAD_IMAGES             = 16'h8a44;
25 | localparam ADDR_WRITE_FENCE             = 16'h8a48;
26 | 
27 | typedef struct
28 |   {
29 |    logic        afu_dsm_base_valid;
30 |    logic [63:0] afu_dsm_base;
31 |    logic        afu_cntxt_base_valid;
32 |    logic [63:0] afu_cntxt_base;
33 |    logic        afu_en;
34 |    logic [31:0] doorbell;
35 |    logic [31:0] read_buffer_lines;
36 |    logic [63:0] read_buffer_base;
37 |    logic [63:0] write_buffer_base;
38 |    logic [31:0] update_dsm;
39 |    logic        pll_reset;
40 |    logic        load_weights;
41 |    logic [7:0]  num_cl_per_filter;
42 |    logic [15:0] num_filters;
43 |    logic [15:0] max_weight_buffer_addr;
44 |    logic        load_images;
45 |    logic        write_fence;
46 |    logic        reset_doorbell;
47 |    logic        reset_update_dsm;
48 |    } afu_csr_t;
49 | 
50 | `endif


--------------------------------------------------------------------------------
/rtl/cacheline_buffer.sv:
--------------------------------------------------------------------------------
 1 | module cacheline_buffer(
 2 | 	input logic wr_clk,
 3 | 	input logic wr_en,
 4 | 	input logic [7:0] wr_addr, 
 5 | 	input logic [511:0] wr_data,
 6 | 	input logic rd_clk,
 7 | 	input logic [7:0] rd_addr,
 8 | 	output logic [511:0] rd_data
 9 | 	);
10 | 	
11 | 	ram_2p ram_2p_low(
12 | 		.data(wr_data[255:0]),
13 | 		.rdaddress(rd_addr),
14 | 		.rdclock(rd_clk), 
15 | 		.wraddress(wr_addr),
16 | 		.wrclock(wr_clk),
17 | 		.wren(wr_en),
18 | 		.q(rd_data[255:0])
19 | 	);
20 | 	
21 | 	ram_2p ram_2p_high(
22 | 		.data(wr_data[511:256]),
23 | 		.rdaddress(rd_addr),
24 | 		.rdclock(rd_clk), 
25 | 		.wraddress(wr_addr),
26 | 		.wrclock(wr_clk),
27 | 		.wren(wr_en),
28 | 		.q(rd_data[511:256])
29 | 	);
30 | 	
31 | endmodule 
32 | 


--------------------------------------------------------------------------------
/rtl/conv_forward_layer.sv:
--------------------------------------------------------------------------------
 1 | module conv_forward_layer #(parameter WIDTH = 8)
 2 | 									(
 3 | 										input		logic					clk,
 4 | 										input		logic					reset,
 5 | 										input		logic		[7:0]		id,
 6 | 										input		logic		[31:0] 	in_data 		[WIDTH-1:0],
 7 | 										input		logic		[31:0]	weight_vec	[WIDTH-1:0],
 8 | //										input		logic		[31:0]	bias_term,
 9 | 										output	logic		[31:0]	out_data,
10 | 										output	logic		[7:0]		id_out
11 | 									);
12 | 										
13 | 	logic [31:0] connections [2*WIDTH] ;
14 | 	
15 | 	genvar i, j;
16 | 	generate 
17 | 		//create float_mult blocks to multiply WIDTH number 
18 | 		//of inputs with weight_vec
19 | 		for (i = 0; i < WIDTH; i++) begin : GEN_MULTS
20 | 			float_mult float_mult_inst(
21 | 												.clk_en(!reset),
22 | 												.clock(clk),
23 | 												.dataa(in_data[i]),
24 | 												.datab(weight_vec[i]),
25 | 												.result(connections[i+WIDTH])
26 | 												);
27 | 		end 
28 | 		//sum the products, and reduce to single value
29 | 		for (i = WIDTH; i > 1; i = i / 2) begin : GEN_SUMS
30 | 			for (j = i; j > i/2 && j != 1; j--) begin : SUM_MULTS
31 | 					float_add float_add_inst(
32 | 												 .aclr(reset),
33 | 												 .clock(clk),
34 | 												 .dataa(connections[2*j-1]),
35 | 												 .datab(connections[2*j-2]),
36 | 												 .result(connections[j-1])
37 | 												 );
38 | 			end
39 | 		end
40 | 	endgenerate
41 | 	
42 | 	//add bias term to sum to produce final sum
43 | //	float_add float_add_bias_term(
44 | //												 .aclr(reset),
45 | //												 .clock(clk),
46 | //												 .dataa(connections[1]),
47 | //												 .datab(bias_term),
48 | //												 .result(connections[0])
49 | //												 );
50 | 	//write result to output reg + pass id val on
51 | 	always @(posedge clk) begin
52 | 		out_data <= connections[1];
53 | 		id_out = id;
54 | 	end
55 | 
56 | endmodule 


--------------------------------------------------------------------------------
/rtl/conv_forward_layer_tb.sv:
--------------------------------------------------------------------------------
 1 | `timescale 1ns/100ps
 2 | 
 3 | module conv_forward_layer_tb();
 4 | 	`include "/home/b/FPGA-CNN/test/test_data/conv_forward_test_data.vh"
 5 | 	parameter CYCLE			= 5;				//clk period: 5ns = 200 Mhz
 6 | 	parameter MULT_DELAY		= 5;				//#clks to complete a mult
 7 | 	parameter ADD_DELAY		= 7;				//#clks to complete an add
 8 | 	parameter WIDTH			= 8;				//input vector width
 9 | 	
10 | 	parameter NUM_TESTS		= 5000;
11 | 	parameter MEM_SIZE		= NUM_TESTS*WIDTH; 
12 | 
13 | 	reg clk, reset;
14 | 	logic [31:0] in_vec [WIDTH-1:0];			//input vec to module
15 | 	logic [31:0] weight_vec [WIDTH-1:0];	//weight vec to module
16 | 	logic [31:0] bias_term;						//bias term to module
17 | 	logic [31:0] out;								//output from module
18 | 	int i, j, num_errors, num_add_levels, delay;
19 | 	
20 | 	//initialize clk
21 | 	initial begin
22 | 		clk = 0;
23 | 	end
24 | 	
25 | 	//forever cycle the clk
26 | 	always begin
27 | 		#(CYCLE/2.0) clk = ~clk;
28 | 	end
29 | 	
30 | 	//instantiate the module 
31 | 	conv_forward_layer 	#(.WIDTH(WIDTH))
32 | 		conv_forward_inst(
33 | 								.clk(clk),
34 | 								.reset(reset),
35 | 								.id(8'b0),
36 | 								.in_data(in_vec),
37 | 								.weight_vec(weight_vec),
38 | 								.bias_term(bias_term),
39 | 								.out_data(out)
40 | 								);
41 | 
42 | 	initial begin
43 | 		reset = 0;
44 | 		num_errors = 0;
45 | 		num_add_levels = 1;
46 | 		//calculate log2(WIDTH)
47 | 		while (WIDTH / (2*num_add_levels) != 1) begin
48 | 			num_add_levels++;
49 | 		end
50 | 		//calculate total delay of one calculation 
51 | 		//1 mult delay, log2(WIDTH) add delays to sum products, 1 add delay for bias term
52 | 		delay = CYCLE*(MULT_DELAY + ADD_DELAY*(num_add_levels + 1) + 1);
53 | 		
54 | 		$display("num add levels: %d", num_add_levels);
55 | 		//for all test cases
56 | 		 for (i = 0; i < MEM_SIZE; i = i + WIDTH) begin
57 | 			//copy each value to input vector
58 | 			for (j = 0; j < WIDTH; j++) begin
59 | 				in_vec[j] = test_input[i+j];
60 | 			end
61 | 			//copy each value to weight vector
62 | 			for (j = 0; j < WIDTH; j++) begin
63 | 				weight_vec[j] = test_weights[i+j];
64 | 			end
65 | 			//copy bias term 
66 | 			bias_term = test_bias[i/WIDTH];
67 | 			
68 | 			//wait for computation to finish
69 | 			#(delay)
70 | 			
71 | 			//if we were wrong, check for rounding error
72 | 			if( out != test_output[i/WIDTH] ) begin
73 | 				//if the number was off because of a rounding error, ignore
74 | 				if ( out - test_output[i/WIDTH] < 32'h000000ff || 
75 | 						test_output[i/WIDTH] - out < 32'h000000ff ) begin
76 | 					//ignore 
77 | 				//otherwise, complain 
78 | 				end else begin
79 | 					assert( out == test_output[i/WIDTH] );
80 | 					$display("output: %h\tcalculated: %h", out, test_output[i/WIDTH]);
81 | 					num_errors++;
82 | 				end
83 | 			end
84 | 			$display("(%f percent)\n", 100.0*(NUM_TESTS-num_errors)/NUM_TESTS);
85 | 		end
86 | 		$display("############################################\n");
87 | 		$display("Testing complete!\n");
88 | 		$display("%d of %d tests passed\n", NUM_TESTS-num_errors, NUM_TESTS);
89 | 		$display("(%f percent)\n", 100.0*(NUM_TESTS-num_errors)/NUM_TESTS);
90 | 		$display("############################################\n");
91 | 	end
92 | 
93 | endmodule
94 | 


--------------------------------------------------------------------------------
/rtl/inner_product_backward.sv:
--------------------------------------------------------------------------------
 1 | /* Sophia Zhang
 2 |  * ECE 44x Senior Design
 3 |  * Block: Inner Product Layer (Backward)
 4 |  * File Name: inner_product_backward.sv
 5 |  * Module: Inner Product Layer (Backward)
 6 |  * Description: The inner product layer (backpropagation) takes in the number of filters
 7 |  * along with the height and width of the vectors. The bias and weight are used along with
 8 |  * floating point multiplication, for dot product, to learn the differences. 
 9 |  * The bias_filler is a constant with a default value of zero, while the weight_filler is
10 |  * a constant set to zero by default.
11 |  */
12 | 
13 | module ip_backward#(parameter WIDTH = 8)
14 | 			(
15 | 				input logic clk, //clock signal
16 | 				input logic reset, //reset
17 | 				input logic [31:0] in_data [WIDTH-1:0], //input data, vector of floats
18 | 				input logic [31:0] weights [WIDTH-1:0], //weight
19 | 				input logic [31:0] bias,
20 | 				input logic [7:0] in_id,
21 | 				output logic [31:0] out_data, //output data, vector of floats
22 | 				output logic [7:0] out_id
23 | 			);
24 | 
25 | 	logic [31:0] connections [2*WIDTH]; 
26 | 	genvar i, j;
27 | 	generate
28 | 		//create float_mult blocks to multiply WIDTH number of inputs with weights
29 | 		for (i = 0; i < WIDTH; i++) begin : GEN_MULTS
30 | 			float_mult floating_mult_inst(
31 | 						.clk_en(!reset),
32 | 						.clock(clk),
33 | 						.dataa(in_data[i]),
34 | 						.datab(weights[i]),
35 | 						.result(connections[i + WIDTH])
36 | 			);
37 | 		end
38 | 
39 | 		//add the products, and reduce to a single value
40 | 		for (i = WIDTH; i > 1; i = i / 2) begin : GEN_SUMS
41 | 			for (j = i; j > i / 2 && j != 1; j--) begin : SUM_MULTS
42 | 				float_add float_add_inst(
43 | 							.aclr(reset),
44 | 							.clock(clk),
45 | 							.dataa(connections[2*j-1]),
46 | 							.datab(connections[2*j-2]),
47 | 							.result(connections[j-1])
48 | 				);
49 | 			end
50 | 		end
51 | 	endgenerate
52 | 	
53 | 	//add bias term to sum to produce final sum
54 | 	float_add float_add_bias_term(
55 | 							.aclr(reset),
56 | 							.clock(clk),
57 | 							.dataa(connections[1]),
58 | 							.datab(bias),
59 | 							.result(connections[0])
60 | 							);
61 | 	
62 | 		always @(posedge clk) begin
63 | 			out_data <= connections[0];
64 | 			out_id <= in_id;
65 | 		end
66 | 	
67 | endmodule
68 | 


--------------------------------------------------------------------------------
/rtl/inner_product_backward_tb.sv:
--------------------------------------------------------------------------------
 1 | `timescale 1ns/100ps
 2 | 
 3 | module inner_product_backward_tb();
 4 | 	//`include "/nfs/stak/students/z/zhangso/ECE441/inner_product_backward/test_data/ip_backward_test_data.vh"
 5 | 	`include "/home/b/FPGA-CNN/test/test_data/inner_product_backward_test_data.vh"
 6 | 
 7 | 	parameter CYCLE			= 5;
 8 | 	parameter MULT_DELAY		= 5;
 9 | 	parameter ADD_DELAY		= 7;
10 | 	parameter WIDTH			= 8;
11 | 	
12 | 	parameter NUM_TESTS		= 5000;
13 | 	parameter MEM_SIZE		= NUM_TESTS*WIDTH; 
14 | 
15 | 	reg clk, reset;
16 | 	logic [31:0] in_vec [WIDTH-1:0];	//input vec to module
17 | 	logic [31:0] weight_vec [WIDTH-1:0];	//weight vec to module
18 | 	logic [31:0] bias_term;
19 | 	logic [31:0] out;			//output from module
20 | 	int i, j, num_errors, num_add_levels, delay;
21 | 	
22 | 	//initialize clk
23 | 	initial begin
24 | 		clk = 0;
25 | 	end
26 | 	
27 | 	//forever cycle the clk
28 | 	always begin
29 | 		#(CYCLE/2.0) clk = ~clk;
30 | 	end
31 | 	
32 | 	//instantiate the module 
33 | 	ip_backward 	#(.WIDTH(WIDTH))
34 | 		ip_backward_inst(
35 | 								.clk(clk),
36 | 								.reset(reset),
37 | 								.in_data(in_vec),
38 | 								.weights(weight_vec),
39 | 								.bias(bias_term),
40 | 								.in_id(8'b0),
41 | 								.out_data(out)
42 | 								);
43 | 
44 | 	initial begin
45 | 		reset = 0;
46 | 		num_errors = 0;
47 | 		num_add_levels = 1;
48 | 		//calculate log2(WIDTH)
49 | 		while (WIDTH / (2*num_add_levels) != 1) begin
50 | 			num_add_levels++;
51 | 		end
52 | 		//calculate total delay of one calculation 
53 | 		//1 mult delay, log2(WIDTH) add delays to sum products, 1 add delay for bias term
54 | 		delay = CYCLE*(MULT_DELAY + ADD_DELAY*(num_add_levels + 1) + 1);
55 | 		
56 | 		$display("num add levels: %d", num_add_levels);
57 | 		//for all test cases
58 | 		 for (i = 0; i < MEM_SIZE; i = i + WIDTH) begin
59 | 			//copy each value to input vector
60 | 			for (j = 0; j < WIDTH; j++) begin
61 | 				in_vec[j] = test_input[i+j];
62 | 			end
63 | 			//copy each value to weight vector
64 | 			for (j = 0; j < WIDTH; j++) begin
65 | 				weight_vec[j] = test_weights[i+j];
66 | 			end
67 | 			//copy bias term 
68 | 			bias_term = test_bias[i/WIDTH];
69 | 			
70 | 			//wait for computation to finish
71 | 			#(delay)
72 | 			
73 | 			$display("output: %h\tcalculated: %h", out, test_output[i/WIDTH]);
74 | 			if( out != test_output[i/WIDTH] ) begin
75 | 				//if the number was off because of a rounding error, ignore
76 | 				if ( out - test_output[i/WIDTH] < 32'h000000ff || 
77 | 						test_output[i/WIDTH] - out < 32'h000000ff ) begin
78 | 					//ignore 
79 | 				//otherwise, complain 
80 | 				end else begin
81 | 					assert( out == test_output[i/WIDTH] );
82 | 					$display("output: %h\tcalculated: %h", out, test_output[i/WIDTH]);
83 | 					num_errors++;
84 | 				end
85 | 			end
86 | 		end
87 | 		$display("############################################\n");
88 | 		$display("Testing complete!\n");
89 | 		$display("%d of %d tests passed\n", NUM_TESTS-num_errors, NUM_TESTS);
90 | 		$display("(%f percent)\n", 100.0*(NUM_TESTS-num_errors)/NUM_TESTS);
91 | 		$display("############################################\n");
92 | 	end
93 | 
94 | endmodule
95 | 


--------------------------------------------------------------------------------
/rtl/inner_product_forward.sv:
--------------------------------------------------------------------------------
 1 | /* 
 2 |  * ECE 44x Senior Design
 3 |  * Block: Inner Product Layer (Forward)
 4 |  * File Name: inner_product_forward.sv
 5 |  * Module: Inner Product Layer (Forward)
 6 |  * Description: The inner product layer (forward) is the dot product of the weight and an input vector. 
 7 |  * Both the forward and backward passes can include a bias.
 8 |  */
 9 | 
10 | module ip_forward#(parameter WIDTH = 8) 
11 | 				(	
12 | 					input logic clk,	//clock signal
13 | 					input logic reset,	//reset
14 | 					input logic [31:0] in_data [WIDTH-1:0], //input data
15 | 					input logic [31:0] weights [WIDTH-1:0], //used in dot product
16 | 					input logic [7:0] in_id,
17 | 					output logic [31:0] out_data, //output data
18 | 					output logic [7:0] out_id
19 | 				);
20 | 
21 | 	logic [31:0] connections [2*WIDTH];
22 | 	genvar i, j;
23 | 	generate
24 | 		//create float_mult blocks to multiply the WIDTH number of inputs by the weights
25 | 		for (i = 0; i < WIDTH; i++) begin : GEN_MULTS
26 | 			floating_mult floating_mult_inst(
27 | 					.clk_en(!reset),
28 | 					.clock(clk),
29 | 					.dataa(in_data[i]),
30 | 					.datab(weights[i]),
31 | 					.result(connections[i+WIDTH])
32 | 				);
33 | 		end
34 | 
35 | 		//add the products and reduce to a single value
36 | 		for (i = WIDTH; i > 1; i = i / 2) begin : GEN_SUMS
37 | 			for (j = i; j > i / 2 && j != 1; j--) begin : SUM_MULTS
38 | 				float_add float_add_inst(
39 | 					.aclr(reset),
40 | 					.clock(clk),
41 | 					.dataa(connections[2*j-1]),
42 | 					.datab(connections[2*j-2]),
43 | 					.result(connections[j-1])
44 | 				);
45 | 			end
46 | 		end
47 | 	endgenerate
48 | 
49 | 
50 | 	always @(posedge clk) begin
51 | 		out_data <= connections[1];
52 | 		out_id <= in_id;
53 | 	end
54 | 
55 | endmodule
56 | 


--------------------------------------------------------------------------------
/rtl/inner_product_forward_tb.sv:
--------------------------------------------------------------------------------
 1 | `timescale 1ns/100ps
 2 | 
 3 | module inner_product_forward_tb();
 4 | 	`include "/nfs/stak/students/z/zhangso/ECE441/inner_product_forward/test_data/ip_forward_test_data.vh"
 5 | 	parameter CYCLE			= 5;
 6 | 	parameter MULT_DELAY		= 5;
 7 | 	parameter ADD_DELAY		= 7;
 8 | 	parameter WIDTH			= 8;
 9 | 	
10 | 	parameter NUM_TESTS		= 5000;
11 | 	parameter MEM_SIZE		= NUM_TESTS*WIDTH; 
12 | 
13 | 	reg clk, reset;
14 | 	logic [31:0] in_vec [WIDTH-1:0];	//input vec to module
15 | 	logic [31:0] weight_vec [WIDTH-1:0];	//weight vec to module
16 | 	//logic [31:0] bias_term;			//bias term to module
17 | 	logic [31:0] out;			//output from module
18 | 	int id, i, j, num_errors, num_add_levels, delay;
19 | 	
20 | 	//initialize clk
21 | 	initial begin
22 | 		clk = 0;
23 | 	end
24 | 	
25 | 	//forever cycle the clk
26 | 	always begin
27 | 		#(CYCLE/2.0) clk = ~clk;
28 | 	end
29 | 	
30 | 	//instantiate the module 
31 | 	ip_forward 	#(.WIDTH(WIDTH))
32 | 		inner_product_forward_inst(
33 | 								.clk(clk),
34 | 								.reset(reset),
35 | 								.in_data(in_vec),
36 | 								.weights(weight_vec),
37 | 								.in_id(8'b0),
38 | 								.out_data(out)
39 | 								);
40 | 
41 | 	initial begin
42 | 		reset = 0;
43 | 		num_errors = 0;
44 | 		num_add_levels = 1;
45 | 		//calculate log2(WIDTH)
46 | 		while (WIDTH / (2*num_add_levels) != 1) begin
47 | 			num_add_levels++;
48 | 		end
49 | 		//calculate total delay of one calculation 
50 | 		//1 mult delay, log2(WIDTH) add delays to sum products, 1 add delay for bias term
51 | 		//num_add_levels + 1 for bias
52 | 		delay = CYCLE*(MULT_DELAY + ADD_DELAY*(num_add_levels) + 2);
53 | 		
54 | 		$display("num add levels: %d", num_add_levels);
55 | 		//for all test cases
56 | 		 for (i = 0; i < MEM_SIZE; i = i + WIDTH) begin
57 | 			//copy each value to input vector
58 | 			for (j = 0; j < WIDTH; j++) begin
59 | 				in_vec[j] = test_input[i+j];
60 | 			end
61 | 			//copy each value to weight vector
62 | 			for (j = 0; j < WIDTH; j++) begin
63 | 				weight_vec[j] = test_weights[i+j];
64 | 			end
65 | 			//copy bias term 
66 | 			//bias_term = test_bias[i/WIDTH];
67 | 			
68 | 			//wait for computation to finish
69 | 			#(delay)
70 | 			
71 | 			$display("output: %h\tcalculated: %h", out, test_output[i/WIDTH]);
72 | 			assert( out == test_output[i/WIDTH] );
73 | 			//if we were wrong, increase error count
74 | 			if( out != test_output[i/WIDTH] ) begin
75 | 				num_errors++;
76 | 			end
77 | 		end
78 | 		$display("############################################\n");
79 | 		$display("Testing complete!\n");
80 | 		$display("%d of %d tests passed\n", NUM_TESTS-num_errors, NUM_TESTS);
81 | 		$display("(%f percent)\n", 100.0*(NUM_TESTS-num_errors)/NUM_TESTS);
82 | 		$display("############################################\n");
83 | 	end
84 | 
85 | endmodule
86 | 


--------------------------------------------------------------------------------
/rtl/loss_layer_tb.sv:
--------------------------------------------------------------------------------
  1 | `timescale 1ns/100ps
  2 | 
  3 | module loss_layer_tb();
  4 | 	`include "/home/b/bear_git/FPGA-CNN/test/test_data/softmax_with_loss_test_data.vh"
  5 | 	parameter CYCLE			= 5;				//clk period: 5ns = 200 Mhz
  6 | 	parameter MULT_DELAY		= 5;				//#clks to complete a mult
  7 | 	parameter ADD_DELAY		= 7;				//#clks to complete an add
  8 | 	parameter SUB_DELAY		= 7;				//#clks to complete a sub
  9 | 	parameter EXP_DELAY		= 17;				//#clks to complete an exponential
 10 | 	parameter LOG_DELAY		= 21;				//#clks to complete a log
 11 | 	parameter DIV_DELAY		= 6;				//#clks to complte a div
 12 | 	parameter WIDTH			= 8;				//input vector width
 13 | 	
 14 | 	parameter NUM_TESTS		= 10000;
 15 | 	parameter MEM_SIZE		= NUM_TESTS*WIDTH; 
 16 | 
 17 | 	reg clk, reset;
 18 | 	logic [31:0] in_vec [WIDTH-1:0];			//input vec to module
 19 | 	logic [31:0] label;							//correct classification
 20 | 	logic [7:0]  id;								//identification value
 21 | 	logic [31:0] out;								//output from module
 22 | 	logic 		 f_overall_sum;
 23 | 	int i, j, num_errors, num_add_levels, delay, sub_exp_add_delay, div_log_delay;
 24 | 	
 25 | 	//initialize clk
 26 | 	initial begin
 27 | 		clk = 0;
 28 | 	end
 29 | 	
 30 | 	//forever cycle the clk
 31 | 	always begin
 32 | 		#(CYCLE/2.0) clk = ~clk;
 33 | 	end
 34 | 	
 35 | 	//instantiate the module 
 36 | 	lol_opt 	#(.WIDTH(WIDTH))
 37 | 		lol_opt_inst(
 38 | 								.clk(clk),
 39 | 								.reset_n(reset),
 40 | 								.in_ID(id),
 41 | 								.f_overall_sum(f_overall_sum),
 42 | 								.all_clsf(in_vec),
 43 | 								.corr_clsf(label),
 44 | 								.data_out(out)
 45 | 								);
 46 | 
 47 | 	initial begin
 48 | 		reset = 1;
 49 | 		id = 0;
 50 | 		f_overall_sum = 0;
 51 | 		num_errors = 0;
 52 | 		num_add_levels = 1;
 53 | 		//calculate log2(WIDTH)
 54 | 		while (WIDTH / (2*num_add_levels) != 1) begin
 55 | 			num_add_levels++;
 56 | 		end
 57 | 		//calculate total delay of one calculation 
 58 | 		sub_exp_add_delay = CYCLE*(SUB_DELAY + EXP_DELAY + ADD_DELAY*(num_add_levels));
 59 | 		div_log_delay = CYCLE*(DIV_DELAY + LOG_DELAY-1);
 60 | 		
 61 | 		$display("num add levels: %d", num_add_levels);
 62 | 		//for all test cases
 63 | 		 for (i = 0; i < MEM_SIZE; i = i + WIDTH) begin
 64 | 			//reset module
 65 | 			reset = 0;
 66 | 			#CYCLE reset = 1;
 67 | 			
 68 | 			//copy each value to input vector
 69 | 			for (j = 0; j < WIDTH; j++) begin
 70 | 				in_vec[j] = test_input[i+j];
 71 | 			end
 72 | 			
 73 | 			//copy label
 74 | 			label = test_label[i/WIDTH];
 75 | 			
 76 | 			//wait for computation to finish 
 77 | 			#(sub_exp_add_delay)
 78 | 			f_overall_sum = 1;
 79 | 			#CYCLE
 80 | 			f_overall_sum = 0;
 81 | 			//add to overall sum
 82 | 			#(CYCLE*ADD_DELAY)
 83 | 			//div and log
 84 | 			#(div_log_delay)
 85 | 			
 86 | 			//if we were wrong, check for rounding error
 87 | 			if( out != test_output[i/WIDTH] ) begin
 88 | 				//if log(1.0) in NumPy gave us garbage, do our own check	
 89 | 				if ( test_div[i/WIDTH] == 32'h3f800000 ) begin
 90 | 					if ( out != 32'h80000000 ) begin
 91 | 						$display("Error! Module did not correctly handle log(1.0)");
 92 | 						$display("output: %h\tcalculated: 32'h80000000", out);
 93 | 					end
 94 | 				//if the number was off because of a rounding error, ignore
 95 | 				end else if ( out - test_output[i/WIDTH] < 32'h0000ffff ||
 96 | 									test_output[i/WIDTH] - out < 32'h0000ffff ) begin
 97 | 						//$display("Rounding error");
 98 | 				//otherwise, complain
 99 | 				end else begin
100 | 					//assert( out == test_output[i/WIDTH] );
101 | 					$display("Error! Module result not expected value");
102 | 					$display("output: %h\tcalculated: %h", out, test_output[i/WIDTH]);
103 | 					$display("out&:\t\t%b", out & 32'hfffff000);
104 | 					$display("corr&:\t\t%b", test_output[i/WIDTH] & 32'hfffff000);
105 | 					$display("out-corr:\t\t%b", out - test_output[i/WIDTH]);
106 | 					$display("corr-out:\t\t%b", test_output[i/WIDTH] - out);
107 | 					num_errors++;
108 | 				end
109 | 			end
110 | 			$display("(%f percent)\n", 100.0*((i/WIDTH)+1-num_errors)/((i/WIDTH)+1));
111 | 		end
112 | 		$display("############################################\n");
113 | 		$display("Testing complete!\n");
114 | 		$display("%d of %d tests passed\n", NUM_TESTS-num_errors, NUM_TESTS);
115 | 		$display("(%f percent)\n", 100.0*(NUM_TESTS-num_errors)/NUM_TESTS);
116 | 		$display("############################################\n");
117 | 	end
118 | 
119 | endmodule
120 | 


--------------------------------------------------------------------------------
/rtl/loss_opt.sv:
--------------------------------------------------------------------------------
  1 | /* Author: Youthawin Philavastvanid 
  2 |  * Date  : 02/08/2016 
  3 |  * 
  4 |  * Module: loss_opt
  5 |  * Desc  :
  6 |  *   
  7 |  * Design:  
  8 |  * Input : 
  9 |  * Ouput :
 10 |  *
 11 |  * Timeline: 
 12 |  
 13 |  * WARNING: 
 14 | 	f_inc_idx should be a pulse w/ 1 clk cyc WIDTH indicating it is time increment the idx of the adder
 15 | 	f_inc_idx_exp should be a pulse w/ 1 clk cyc WIDTH indicating it is time increment the idx of the e^( z_correctClassification )
 16 | */
 17 | 
 18 | module lol_opt#(
 19 | 	parameter 				WEIGHT=1,
 20 | 							WIDTH=8	//number of float input 
 21 | 					 )(
 22 | 
 23 | 	input					reset_n,	//reset
 24 | 	input 					clk,		//clock
 25 | 	input					f_overall_sum,				//Summer inc flag 
 26 | 	input		[31:0]		all_clsf [WIDTH-1:0],	//calculated classification
 27 | 	input		[31:0]		corr_clsf,				//correcnt classification
 28 | 	input reg	[7:0]		in_ID,
 29 | 
 30 | 	output reg	[7:0]		out_ID,
 31 | 	output reg  [31:0]		data_out	//Vector data output
 32 | 
 33 | ); 
 34 | 
 35 | 	reg [31:0] sub_result [WIDTH-1:0];
 36 | 	reg [31:0] corr_clsf_sub_result;
 37 | 	reg [31:0] overall_sum;
 38 | 	reg [31:0] buff_overall_sum;
 39 | 	reg [31:0] current_sum;
 40 | 	
 41 | 	reg [31:0] sum_e_all_clsf;				//SUM( e^( z_allClassification ) )
 42 | 	reg [31:0] buff_sum_e_all_clsf;			//output buff for SUM( e^( z_allClassification ) )
 43 | 
 44 | 	reg [31:0] e_all_clsf	   [WIDTH-1:0];	//e^( z_allClassification ) 
 45 | 	reg [15:0] idx_e_all_clsf;				//idx for e_all_clsf
 46 | 	reg 	   f_set_inc_idx_e_all;			//increment flag for idx_e_all_clsf
 47 | 
 48 | 	reg [31:0] e_corr_clsf;					//e^( z_correctClassification )
 49 | 
 50 | 	reg [31:0] div_ecorr_sumall;			//e^( z_correctClassification ) / SUM( e^( z_allClassification )
 51 | 
 52 | 	reg [31:0] buff_out_div;
 53 | 	reg [31:0] buff_out;
 54 | 
 55 | 	logic [31:0] connections [2*WIDTH] ;
 56 | 	
 57 | 	genvar i, j;
 58 | 	generate 
 59 | 		//create float_sub blocks to subtract WIDTH number 
 60 | 		//of inputs with weight_vec
 61 | 		for (i = 0; i < WIDTH; i++) begin : GEN_SUBS
 62 | 			flt_sub flt_sub_inst(									
 63 | 												.aclr(!reset_n),
 64 | 												.clock(clk),
 65 | 												.dataa(all_clsf[i]),
 66 | 												.datab(corr_clsf),
 67 | 												.result(sub_result[i])
 68 | 												);
 69 | 		end 
 70 | 		//create float_exp blocks to multiply WIDTH number 
 71 | 		//of inputs with weight_vec
 72 | 		for (i = 0; i < WIDTH; i++) begin : GEN_EXPS
 73 | 			flt_exp flt_exp_inst(									
 74 | 												.aclr(!reset_n),
 75 | 												.clock(clk),
 76 | 												.data(sub_result[i]),
 77 | 												.result(connections[i+WIDTH])
 78 | 												);
 79 | 		end 
 80 | 		//sum the products, and reduce to single value
 81 | 		for (i = WIDTH; i > 1; i = i / 2) begin : GEN_SUMS
 82 | 			for (j = i; j > i/2 && j != 1; j--) begin : SUM_MULTS
 83 | 					flt_add flt_add_inst(
 84 | 												 .aclr(!reset_n),
 85 | 												 .clock(clk),
 86 | 												 .dataa(connections[2*j-1]),
 87 | 												 .datab(connections[2*j-2]),
 88 | 												 .result(connections[j-1])
 89 | 												 );
 90 | 			end
 91 | 		end
 92 | 	endgenerate
 93 | 
 94 | 	flt_add_new flt_add_overall_sum( 
 95 | 												 .aclr(!reset_n),
 96 | 												 .clock(clk),
 97 | 												 .dataa(current_sum),
 98 | 												 .datab(overall_sum),
 99 | 												 .result(buff_overall_sum)
100 | 												 );
101 | 	//forwarding the ID
102 | 	assign out_ID = in_ID;
103 | 	assign current_sum = connections[1];
104 | 	always_ff @(posedge clk, negedge reset_n) begin
105 | 		if (!reset_n) begin
106 | 			overall_sum <= 0;
107 | 		end else begin
108 | 			if( f_overall_sum ) 
109 | 				overall_sum <= buff_overall_sum;
110 | 			else 
111 | 				overall_sum <= overall_sum;
112 | 		end
113 | 	end
114 | 
115 | 
116 | 	//Dividing --  e^( z_correctClassification ) / SUM( e^( z_allClassification ) )
117 | 	flt_div_new flt_div_inst(																//[+6]
118 | 		.aclr	(!reset_n),
119 | 		.clock	(clk),
120 | 		.dataa	(32'h3f800000),
121 | 		.datab	(buff_overall_sum),
122 | 		.result	(div_ecorr_sumall)		);
123 | 		
124 | 	//Taking log of quotion product
125 | 	flt_log flt_log(																//[+21]
126 | 		.aclr	(!reset_n),
127 | 		.clock	(clk),
128 | 		.data	(div_ecorr_sumall),
129 | 		.result	(buff_out_div)			);
130 | 		
131 | 	//Multiply (-1)
132 | 	assign data_out = buff_out_div ^ (1<<31);
133 | 
134 | endmodule
135 | 
136 | 


--------------------------------------------------------------------------------
/rtl/pooling_backward_layer_tb.sv:
--------------------------------------------------------------------------------
  1 | `timescale 1ns/100ps
  2 | `define DEBUG 1
  3 | module pooling_backward_layer_tb();
  4 | 	`include "/home/b/bear_git/FPGA-CNN/test/test_data/pooling_backward_test_data.vh"
  5 | 	parameter CYCLE			= 5;
  6 | 	parameter MULT_DELAY		= 5;
  7 | 	parameter KERNEL_WIDTH	= 3;
  8 | 	parameter KERNEL_HEIGHT	= 3;
  9 | 	parameter WIDTH			= KERNEL_WIDTH*KERNEL_HEIGHT;
 10 | 	
 11 | 	parameter NUM_TESTS		= 5000;
 12 | 	parameter MEM_SIZE		= NUM_TESTS*WIDTH;
 13 | 
 14 | 	
 15 | 	reg clk, reset;
 16 | 	logic [31:0] in_vec			[WIDTH-1:0];			//input vec to module
 17 | 	logic [7:0]	 in_idx;
 18 | 	logic [31:0] in_err_term;
 19 | 	logic [31:0] out_data		[WIDTH-1:0];				//output from module
 20 | 	int i, j, k, num_errors, num_depth, delay;
 21 | 
 22 | 	//initialize clk
 23 | 	initial begin
 24 | 		clk = 0;
 25 | 	end
 26 | 
 27 | 	//forever cycle the clk
 28 | 	always begin
 29 | 		#(CYCLE/2.0) clk = ~clk;
 30 | 	end
 31 | 
 32 | 	//instantiate the module
 33 | 	pooling_backward_opt #( .k_w(KERNEL_WIDTH), .k_h(KERNEL_HEIGHT) )
 34 | 		pooling_backward_tbmodule	(
 35 | 												.reset_n			(reset),	//reset
 36 | 												.clk				(clk),		//clock
 37 | 												.max_flt_idx	(in_idx),
 38 | 												.data_vect_in	(in_vec),	//Vector data input
 39 | 												.error_term		(in_err_term),
 40 | 
 41 | 												.data_vect_out	(out_data)	//Vector data output
 42 | 											);
 43 | 
 44 | 	initial begin
 45 | 		reset = 0;
 46 | 		num_errors = 0;
 47 | 
 48 | 		//calculate total delay of one calculation
 49 | 		//one multiplication, plus one cycle to load operand, one to load result
 50 | 		delay = CYCLE*(MULT_DELAY + 3); 
 51 | 
 52 | 		//for all test cases
 53 | 		 for (i = 0; i < MEM_SIZE; i = i + WIDTH) begin
 54 | 			//copy each value to input vector
 55 | 			
 56 | 			for (j = 0; j < WIDTH; j++) begin
 57 | 				in_vec[j] = test_input[i+j];
 58 | 			end
 59 | 
 60 | 			in_idx = test_index[i/WIDTH];
 61 | 			in_err_term = test_error_term[i/WIDTH];
 62 | 
 63 | 			//wait for computation to finish
 64 | 			#(delay)
 65 | 			
 66 | 			$display("test case: %d\t", i/WIDTH);
 67 | 			$display("test idx: %d\t", in_idx);
 68 | 			$display("err_term: %h\t", in_err_term);
 69 | 			`ifdef DEBUG
 70 | 				$display("in_vec\t test_input");
 71 | 				for (j = 0; j < WIDTH; j++) begin
 72 | 					$display("%h\t%h", in_vec[j], test_input[i+j]);
 73 | 				end
 74 | 				$display("out_data\t test_output\t");
 75 | 				for (j = 0; j < WIDTH; j++) begin
 76 | 					$display("%h\t%h", out_data[j], test_output[i+j]);
 77 | 				end
 78 | 			`endif
 79 | 
 80 | 			if( out_data[in_idx] != test_output[i+in_idx]) begin
 81 | 				//if the number was off because of a rounding error, ignore
 82 | 				if ( out_data[in_idx] - test_output[i+in_idx] < 32'h000000ff || 
 83 | 						test_output[i+in_idx] - out_data[in_idx] < 32'h000000ff ) begin
 84 | 					`ifdef DEBUG
 85 | 						$display("rounding error");
 86 | 					`endif
 87 | 				//otherwise, complain 
 88 | 				end else begin
 89 | 					assert( out_data == test_output[i+in_idx] );
 90 | 					$display("output: %h\tcalculated: %h", out_data[in_idx], test_output[i+in_idx]);
 91 | 					num_errors++;
 92 | 				end
 93 | 			end
 94 | 
 95 | 			$display("\n\n");
 96 | 
 97 | 		end
 98 | 		$display("############################################\n");
 99 | 		$display("Testing complete!\n");
100 | 		$display("%d of %d tests passed\n", NUM_TESTS-num_errors, NUM_TESTS);
101 | 		$display("(%f percent)\n", 100.0*(NUM_TESTS-num_errors)/NUM_TESTS);
102 | 		$display("############################################\n");
103 | 	end
104 | 
105 | endmodule
106 | 


--------------------------------------------------------------------------------
/rtl/pooling_backward_opt.sv:
--------------------------------------------------------------------------------
 1 | /* Author: Youthawin Philavastvanid 
 2 |  * Date  : 02/08/2016 
 3 |  * 
 4 |  * Module: pooling_backward_opt
 5 |  * Desc  :
 6 |  *   
 7 |  * Design:  
 8 |  * Input : Takes in a 1D vector containing all the floating point values
 9 |  * Ouput : Maximum value of the float
10 |  *
11 |  * WARNING: Max number the module can handle is 32  floating point
12 | */
13 | 
14 | module pooling_backward_opt#(
15 | 	parameter 			
16 | 					k_w=3,		//kernel width
17 | 					k_h=3,		//kernel height
18 | 					k_size= k_w*k_h)(//kernel size
19 | 
20 | 
21 | 	input logic 				reset_n,			//reset
22 | 	input logic					clk,				//clock
23 | 	input logic	[7:0]			max_flt_idx,	//idx of max float in a kernel	
24 | 	input logic	[31:0]		data_vect_in[k_size-1:0],	//data input
25 | 	input logic	[31:0]		error_term,		//error term for a kernel
26 | 
27 | 	output reg  [31:0]	data_vect_out[k_size-1:0]	//Vector data output
28 | ); 
29 | 
30 | reg [31:0]  max;
31 | reg [31:0] result;
32 | 
33 | //	data_vect_out[row][col] <= data_vect_in[row][col]*error_term;
34 | float_mult float_mult_inst	(
35 | 										.clk_en(!reset_n),
36 | 										.clock(clk),
37 | 										.dataa(max),
38 | 										.datab(error_term),
39 | 										.result(result)	
40 | 									);
41 | 
42 | 	always @(posedge clk) begin
43 | 		for(int i=0; i<k_size; i++) begin: for_row_itr
44 | 			if(i != max_flt_idx) begin
45 | 				data_vect_out[i] <= data_vect_in[i];
46 | 			end else begin
47 | 				max <= data_vect_in[max_flt_idx];
48 | 				data_vect_out[max_flt_idx] <= result;
49 | 			end
50 | 		end //End here -- for(i)
51 | 	end //End here -- always
52 | 	
53 | endmodule
54 | 


--------------------------------------------------------------------------------
/rtl/qa_conv.sv:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | module qa_conv(
  4 | 					input logic clk, 
  5 | 					input logic resetb,
  6 | 					input logic ready,
  7 | 					input logic start,
  8 | 					input logic buffer_select,
  9 | 					input logic [7:0] wr_addr,
 10 | 					input logic [511:0] data,
 11 | 					output logic [511:0] result
 12 | 					);
 13 | 	
 14 | 	parameter NUM_BLOCKS = 8;
 15 | 	parameter NUM_CYCLES = 5 + 7; //5 cycles for mult, 7 for add
 16 | 	parameter MIN_CL_COUNT = 12; //randomly selected for the moment
 17 | 	parameter BUFFER_DEPTH = 256;
 18 | 	
 19 | 	typedef enum logic [2:0] {IDLE, READ, RUN, FINISH} state_t;
 20 | 	
 21 | 	state_t state;
 22 | 	state_t next_state;
 23 | 	logic [7:0] cl_count;
 24 | 	logic [7:0] rd_addr;
 25 | 	logic [4:0] cycle_count;
 26 | 	
 27 | 	// FSM state update
 28 | 	always_ff @(posedge clk) begin
 29 | 		if (!resetb) begin
 30 | 			state <= IDLE;
 31 | 		end else begin
 32 | 			state <= next_state;
 33 | 		end
 34 | 	end
 35 | 	
 36 | 	
 37 | 	always_comb begin
 38 | 		case (state)
 39 | 			IDLE:
 40 | 				next_state = start ? READ : IDLE;
 41 | 			READ: //read until we have at least MIN_CL_COUNT cachelines in buff
 42 | 				next_state = cacheline_count < MIN_CL_COUNT ? READ : RUN;
 43 | 			RUN:
 44 | 				next_state = rd_addr == BUFFER_DEPTH-1 ? FINISH : RUN;
 45 | 			FINISH:
 46 | 				next_state = ready ? IDLE : FINISH;
 47 | 			default:
 48 | 				next_state = state;
 49 | 		endcase
 50 | 	end
 51 | 	
 52 | 	//in idle we reset the counter variables
 53 | 	always_ff @(posedge clk) begin
 54 | 		if (state == IDLE) begin
 55 | 			cl_count <= 0;
 56 | 			rd_addr <= 0;
 57 | 			cycle_count <= 0;
 58 | 		end
 59 | 	end
 60 | 	
 61 | 	//if starting, fill data and weight buffers
 62 | 	always_ff @(posedge clk) begin
 63 | 		if (state == READ && !buffer_select) begin
 64 | 			cl_count <= cl_count + 1;
 65 | 		end
 66 | 	end
 67 | 	
 68 | 	//after we have completed a computation, increment addr
 69 | 	always_ff @(posedge clk) begin
 70 | 		if (state == RUN) begin
 71 | 			if (cycle_count > NUM_CYCLES) begin
 72 | 				rd_addr <= rd_addr + 1;
 73 | 				cycle_count <= 0;
 74 | 			end else begin
 75 | 				cycle_count <= cycle_count + 1;
 76 | 			end
 77 | 		end
 78 | 	end
 79 | 	
 80 | 	//select buffer to write to 
 81 | 	assign wr_en_input_data = !buffer_select;
 82 | 	assign wr_en_weight_data = buffer_select;
 83 | 	
 84 | 	
 85 | 	logic [255:0] conv_out;
 86 | 	logic [511:0] data_buffer_out;
 87 | 	logic [511:0] weight_buffer_out;
 88 | 	
 89 | 	cacheline_buffer input_data_buffer(
 90 | 											.wr_clk(clk),
 91 | 											.wr_en(wr_en_input_data),
 92 | 											.wr_addr(wr_addr),
 93 | 											.wr_data(data),
 94 | 											.rd_clk(clk),
 95 | 											.rd_addr(rd_addr),
 96 | 											.rd_data(data_buffer_out)
 97 | 										);
 98 | 										
 99 | 	cacheline_buffer weight_data_buffer(
100 | 											.wr_clk(clk),
101 | 											.wr_en(wr_en_weight_data),
102 | 											.wr_addr(wr_addr),
103 | 											.wr_data(data),
104 | 											.rd_clk(clk),
105 | 											.rd_addr(rd_addr),
106 | 											.rd_data(weight_buffer_out)
107 | 										);										
108 | 										
109 | 	ram_2p result_data_buffer(
110 | 											.wrclock(clk),
111 | 											.wren(clk),
112 | 											.wraddress(rd_addr),
113 | 											.data(conv_out),
114 | 											.rdclock(clk),
115 | 											.rdaddress(),
116 | 											.q(result)
117 | 										);	
118 | 								
119 | 	genvar i;
120 | 	generate 
121 | 		for (i = 0; i < NUM_BLOCKS; i++) begin : GEN_CONV
122 | 			conv_forward_layer #(WIDTH=2)
123 | 				conv_forward_inst(
124 | 											.clk(clk),
125 | 											.reset(resetb),
126 | 											.id(rd_addr),
127 | 											.in_data(conv_bus.data[(i+1)*64-1:i*64]),
128 | 											.weight_vec(conv_bus.weights[(i+1)*64-1:i*64]),
129 | //											.bias_term(conv_bus.bias[(i+1)*32-1:i*32]),
130 | 											.out_data(conv_out[(i+1)*32-1:i*32])
131 | 										);
132 | 		end
133 | 	endgenerate
134 | 	
135 | 	
136 | 	
137 | endmodule
138 | 						


--------------------------------------------------------------------------------
/rtl/qip/float_add.bsf:
--------------------------------------------------------------------------------
 1 | /*
 2 | WARNING: Do NOT edit the input and output ports in this file in a text
 3 | editor if you plan to continue editing the block that represents it in
 4 | the Block Editor! File corruption is VERY likely to occur.
 5 | */
 6 | /*
 7 | Copyright (C) 1991-2015 Altera Corporation. All rights reserved.
 8 | Your use of Altera Corporation's design tools, logic functions 
 9 | and other software and tools, and its AMPP partner logic 
10 | functions, and any output files from any of the foregoing 
11 | (including device programming or simulation files), and any 
12 | associated documentation or information are expressly subject 
13 | to the terms and conditions of the Altera Program License 
14 | Subscription Agreement, the Altera Quartus Prime License Agreement,
15 | the Altera MegaCore Function License Agreement, or other 
16 | applicable license agreement, including, without limitation, 
17 | that your use is for the sole purpose of programming logic 
18 | devices manufactured by Altera and sold by Altera or its 
19 | authorized distributors.  Please refer to the applicable 
20 | agreement for further details.
21 | */
22 | (header "symbol" (version "1.2"))
23 | (symbol
24 | 	(rect 0 0 184 248)
25 | 	(text "float_add" (rect 66 0 182 18)(font "Arial" (font_size 10)))
26 | 	(text "inst" (rect 8 232 45 244)(font "Arial" ))
27 | 	(port
28 | 		(pt 0 40)
29 | 		(input)
30 | 		(text "dataa[31..0]" (rect 0 0 127 15)(font "Arial" (font_size 8)))
31 | 		(text "dataa[31..0]" (rect 20 34 78 46)(font "Arial" (font_size 8)))
32 | 		(line (pt 0 40)(pt 16 40)(line_width 3))
33 | 	)
34 | 	(port
35 | 		(pt 0 56)
36 | 		(input)
37 | 		(text "datab[31..0]" (rect 0 0 127 15)(font "Arial" (font_size 8)))
38 | 		(text "datab[31..0]" (rect 20 50 78 62)(font "Arial" (font_size 8)))
39 | 		(line (pt 0 56)(pt 16 56)(line_width 3))
40 | 	)
41 | 	(port
42 | 		(pt 0 88)
43 | 		(input)
44 | 		(text "clock" (rect 0 0 53 15)(font "Arial" (font_size 8)))
45 | 		(text "clock" (rect 20 82 45 94)(font "Arial" (font_size 8)))
46 | 		(line (pt 0 88)(pt 16 88))
47 | 	)
48 | 	(port
49 | 		(pt 0 104)
50 | 		(input)
51 | 		(text "aclr" (rect 0 0 42 15)(font "Arial" (font_size 8)))
52 | 		(text "aclr" (rect 20 98 38 110)(font "Arial" (font_size 8)))
53 | 		(line (pt 0 104)(pt 16 104))
54 | 	)
55 | 	(port
56 | 		(pt 184 40)
57 | 		(output)
58 | 		(text "result[31..0]" (rect 0 0 138 15)(font "Arial" (font_size 8)))
59 | 		(text "result[31..0]" (rect 109 34 166 46)(font "Arial" (font_size 8)))
60 | 		(line (pt 184 40)(pt 168 40)(line_width 3))
61 | 	)
62 | 	(drawing
63 | 		(text "Clock Cycles: 7" (rect 20 115 108 240)(font "Arial" ))
64 | 		(text "Single Precision" (rect 20 131 107 272)(font "Arial" ))
65 | 		(text "Exponent Width: 8" (rect 20 147 118 304)(font "Arial" ))
66 | 		(text "Mantissa Width: 23" (rect 20 163 122 336)(font "Arial" ))
67 | 		(text "Direction: Add" (rect 20 179 100 368)(font "Arial" ))
68 | 		(text "Optimization: Speed" (rect 20 195 125 400)(font "Arial" ))
69 | 		(line (pt 0 0)(pt 186 0))
70 | 		(line (pt 186 0)(pt 186 250))
71 | 		(line (pt 0 250)(pt 186 250))
72 | 		(line (pt 0 0)(pt 0 250))
73 | 		(line (pt 16 24)(pt 170 24))
74 | 		(line (pt 170 24)(pt 170 226))
75 | 		(line (pt 16 226)(pt 170 226))
76 | 		(line (pt 16 24)(pt 16 226))
77 | 	)
78 | )
79 | 


--------------------------------------------------------------------------------
/rtl/qip/float_add.cmp:
--------------------------------------------------------------------------------
 1 | --Copyright (C) 1991-2015 Altera Corporation. All rights reserved.
 2 | --Your use of Altera Corporation's design tools, logic functions 
 3 | --and other software and tools, and its AMPP partner logic 
 4 | --functions, and any output files from any of the foregoing 
 5 | --(including device programming or simulation files), and any 
 6 | --associated documentation or information are expressly subject 
 7 | --to the terms and conditions of the Altera Program License 
 8 | --Subscription Agreement, the Altera Quartus Prime License Agreement,
 9 | --the Altera MegaCore Function License Agreement, or other 
10 | --applicable license agreement, including, without limitation, 
11 | --that your use is for the sole purpose of programming logic 
12 | --devices manufactured by Altera and sold by Altera or its 
13 | --authorized distributors.  Please refer to the applicable 
14 | --agreement for further details.
15 | 
16 | 
17 | component float_add
18 | 	PORT
19 | 	(
20 | 		aclr		: IN STD_LOGIC ;
21 | 		clock		: IN STD_LOGIC ;
22 | 		dataa		: IN STD_LOGIC_VECTOR (31 DOWNTO 0);
23 | 		datab		: IN STD_LOGIC_VECTOR (31 DOWNTO 0);
24 | 		result		: OUT STD_LOGIC_VECTOR (31 DOWNTO 0)
25 | 	);
26 | end component;
27 | 


--------------------------------------------------------------------------------
/rtl/qip/float_add.inc:
--------------------------------------------------------------------------------
 1 | --Copyright (C) 1991-2015 Altera Corporation. All rights reserved.
 2 | --Your use of Altera Corporation's design tools, logic functions 
 3 | --and other software and tools, and its AMPP partner logic 
 4 | --functions, and any output files from any of the foregoing 
 5 | --(including device programming or simulation files), and any 
 6 | --associated documentation or information are expressly subject 
 7 | --to the terms and conditions of the Altera Program License 
 8 | --Subscription Agreement, the Altera Quartus Prime License Agreement,
 9 | --the Altera MegaCore Function License Agreement, or other 
10 | --applicable license agreement, including, without limitation, 
11 | --that your use is for the sole purpose of programming logic 
12 | --devices manufactured by Altera and sold by Altera or its 
13 | --authorized distributors.  Please refer to the applicable 
14 | --agreement for further details.
15 | 
16 | 
17 | FUNCTION float_add 
18 | (
19 | 	aclr,
20 | 	clock,
21 | 	dataa[31..0],
22 | 	datab[31..0]
23 | )
24 | 
25 | RETURNS (
26 | 	result[31..0]
27 | );
28 | 


--------------------------------------------------------------------------------
/rtl/qip/float_add.qip:
--------------------------------------------------------------------------------
 1 | set_global_assignment -name IP_TOOL_NAME "ALTFP_ADD_SUB"
 2 | set_global_assignment -name IP_TOOL_VERSION "15.1"
 3 | set_global_assignment -name IP_GENERATED_DEVICE_FAMILY "{Stratix V}"
 4 | set_global_assignment -name VERILOG_FILE [file join $::quartus(qip_path) "float_add.v"]
 5 | set_global_assignment -name MISC_FILE [file join $::quartus(qip_path) "float_add.bsf"]
 6 | set_global_assignment -name MISC_FILE [file join $::quartus(qip_path) "float_add_inst.v"]
 7 | set_global_assignment -name MISC_FILE [file join $::quartus(qip_path) "float_add_bb.v"]
 8 | set_global_assignment -name MISC_FILE [file join $::quartus(qip_path) "float_add.inc"]
 9 | set_global_assignment -name MISC_FILE [file join $::quartus(qip_path) "float_add.cmp"]
10 | set_global_assignment -name MISC_FILE [file join $::quartus(qip_path) "float_add_syn.v"]
11 | 


--------------------------------------------------------------------------------
/rtl/qip/float_add_bb.v:
--------------------------------------------------------------------------------
 1 | // megafunction wizard: %ALTFP_ADD_SUB%VBB%
 2 | // GENERATION: STANDARD
 3 | // VERSION: WM1.0
 4 | // MODULE: altfp_add_sub 
 5 | 
 6 | // ============================================================
 7 | // File Name: float_add.v
 8 | // Megafunction Name(s):
 9 | // 			altfp_add_sub
10 | //
11 | // Simulation Library Files(s):
12 | // 			lpm
13 | // ============================================================
14 | // ************************************************************
15 | // THIS IS A WIZARD-GENERATED FILE. DO NOT EDIT THIS FILE!
16 | //
17 | // 15.1.1 Build 189 12/02/2015 SJ Standard Edition
18 | // ************************************************************
19 | 
20 | //Copyright (C) 1991-2015 Altera Corporation. All rights reserved.
21 | //Your use of Altera Corporation's design tools, logic functions 
22 | //and other software and tools, and its AMPP partner logic 
23 | //functions, and any output files from any of the foregoing 
24 | //(including device programming or simulation files), and any 
25 | //associated documentation or information are expressly subject 
26 | //to the terms and conditions of the Altera Program License 
27 | //Subscription Agreement, the Altera Quartus Prime License Agreement,
28 | //the Altera MegaCore Function License Agreement, or other 
29 | //applicable license agreement, including, without limitation, 
30 | //that your use is for the sole purpose of programming logic 
31 | //devices manufactured by Altera and sold by Altera or its 
32 | //authorized distributors.  Please refer to the applicable 
33 | //agreement for further details.
34 | 
35 | module float_add (
36 | 	aclr,
37 | 	clock,
38 | 	dataa,
39 | 	datab,
40 | 	result)/* synthesis synthesis_clearbox = 1 */;
41 | 
42 | 	input	  aclr;
43 | 	input	  clock;
44 | 	input	[31:0]  dataa;
45 | 	input	[31:0]  datab;
46 | 	output	[31:0]  result;
47 | 
48 | endmodule
49 | 
50 | // ============================================================
51 | // CNX file retrieval info
52 | // ============================================================
53 | // Retrieval info: PRIVATE: FPM_FORMAT NUMERIC "0"
54 | // Retrieval info: PRIVATE: INTENDED_DEVICE_FAMILY STRING "Stratix V"
55 | // Retrieval info: PRIVATE: SYNTH_WRAPPER_GEN_POSTFIX STRING "1"
56 | // Retrieval info: PRIVATE: WIDTH_DATA NUMERIC "32"
57 | // Retrieval info: LIBRARY: altera_mf altera_mf.altera_mf_components.all
58 | // Retrieval info: CONSTANT: DENORMAL_SUPPORT STRING "NO"
59 | // Retrieval info: CONSTANT: DIRECTION STRING "ADD"
60 | // Retrieval info: CONSTANT: INTENDED_DEVICE_FAMILY STRING "Stratix V"
61 | // Retrieval info: CONSTANT: OPTIMIZE STRING "SPEED"
62 | // Retrieval info: CONSTANT: PIPELINE NUMERIC "7"
63 | // Retrieval info: CONSTANT: REDUCED_FUNCTIONALITY STRING "NO"
64 | // Retrieval info: CONSTANT: WIDTH_EXP NUMERIC "8"
65 | // Retrieval info: CONSTANT: WIDTH_MAN NUMERIC "23"
66 | // Retrieval info: USED_PORT: aclr 0 0 0 0 INPUT NODEFVAL "aclr"
67 | // Retrieval info: USED_PORT: clock 0 0 0 0 INPUT NODEFVAL "clock"
68 | // Retrieval info: USED_PORT: dataa 0 0 32 0 INPUT NODEFVAL "dataa[31..0]"
69 | // Retrieval info: USED_PORT: datab 0 0 32 0 INPUT NODEFVAL "datab[31..0]"
70 | // Retrieval info: USED_PORT: result 0 0 32 0 OUTPUT NODEFVAL "result[31..0]"
71 | // Retrieval info: CONNECT: @aclr 0 0 0 0 aclr 0 0 0 0
72 | // Retrieval info: CONNECT: @clock 0 0 0 0 clock 0 0 0 0
73 | // Retrieval info: CONNECT: @dataa 0 0 32 0 dataa 0 0 32 0
74 | // Retrieval info: CONNECT: @datab 0 0 32 0 datab 0 0 32 0
75 | // Retrieval info: CONNECT: result 0 0 32 0 @result 0 0 32 0
76 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_add.v TRUE
77 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_add.inc TRUE
78 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_add.cmp TRUE
79 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_add.bsf TRUE
80 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_add_inst.v TRUE
81 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_add_bb.v TRUE
82 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_add_syn.v TRUE
83 | // Retrieval info: LIB_FILE: lpm
84 | 


--------------------------------------------------------------------------------
/rtl/qip/float_add_inst.v:
--------------------------------------------------------------------------------
1 | float_add	float_add_inst (
2 | 	.aclr ( aclr_sig ),
3 | 	.clock ( clock_sig ),
4 | 	.dataa ( dataa_sig ),
5 | 	.datab ( datab_sig ),
6 | 	.result ( result_sig )
7 | 	);
8 | 


--------------------------------------------------------------------------------
/rtl/qip/float_mult.bsf:
--------------------------------------------------------------------------------
 1 | /*
 2 | WARNING: Do NOT edit the input and output ports in this file in a text
 3 | editor if you plan to continue editing the block that represents it in
 4 | the Block Editor! File corruption is VERY likely to occur.
 5 | */
 6 | /*
 7 | Copyright (C) 1991-2015 Altera Corporation. All rights reserved.
 8 | Your use of Altera Corporation's design tools, logic functions 
 9 | and other software and tools, and its AMPP partner logic 
10 | functions, and any output files from any of the foregoing 
11 | (including device programming or simulation files), and any 
12 | associated documentation or information are expressly subject 
13 | to the terms and conditions of the Altera Program License 
14 | Subscription Agreement, the Altera Quartus Prime License Agreement,
15 | the Altera MegaCore Function License Agreement, or other 
16 | applicable license agreement, including, without limitation, 
17 | that your use is for the sole purpose of programming logic 
18 | devices manufactured by Altera and sold by Altera or its 
19 | authorized distributors.  Please refer to the applicable 
20 | agreement for further details.
21 | */
22 | (header "symbol" (version "1.2"))
23 | (symbol
24 | 	(rect 0 0 224 176)
25 | 	(text "float_mult" (rect 80 0 209 20)(font "Dialog" (font_size 10)))
26 | 	(text "inst" (rect 8 160 45 172)(font "Arial" ))
27 | 	(port
28 | 		(pt 0 48)
29 | 		(input)
30 | 		(text "dataa[31..0]" (rect 0 0 127 15)(font "Dialog" (font_size 8)))
31 | 		(text "dataa[31..0]" (rect 4 35 72 47)(font "Dialog" (font_size 8)))
32 | 		(line (pt 0 48)(pt 80 48)(line_width 3))
33 | 	)
34 | 	(port
35 | 		(pt 0 64)
36 | 		(input)
37 | 		(text "datab[31..0]" (rect 0 0 127 15)(font "Dialog" (font_size 8)))
38 | 		(text "datab[31..0]" (rect 4 51 72 63)(font "Dialog" (font_size 8)))
39 | 		(line (pt 0 64)(pt 80 64)(line_width 3))
40 | 	)
41 | 	(port
42 | 		(pt 0 80)
43 | 		(input)
44 | 		(text "clk_en" (rect 0 0 63 15)(font "Dialog" (font_size 8)))
45 | 		(text "clk_en" (rect 4 67 38 79)(font "Dialog" (font_size 8)))
46 | 		(line (pt 0 80)(pt 80 80))
47 | 	)
48 | 	(port
49 | 		(pt 0 96)
50 | 		(input)
51 | 		(text "clock" (rect 0 0 53 15)(font "Dialog" (font_size 8)))
52 | 		(text "clock" (rect 4 83 34 95)(font "Dialog" (font_size 8)))
53 | 		(line (pt 0 96)(pt 80 96))
54 | 	)
55 | 	(port
56 | 		(pt 224 48)
57 | 		(output)
58 | 		(text "result[31..0]" (rect 0 0 138 15)(font "Dialog" (font_size 8)))
59 | 		(text "result[31..0]" (rect 151 35 218 47)(font "Dialog" (font_size 8)))
60 | 		(line (pt 224 48)(pt 144 48)(line_width 3))
61 | 	)
62 | 	(drawing
63 | 		(text "Clock cycles: 5" (rect 151 114 368 238)(font "Arial" ))
64 | 		(text "Single Precision" (rect 145 130 357 270)(font "Arial" ))
65 | 		(text "Exponent Width: 8" (rect 138 146 354 302)(font "Arial" ))
66 | 		(text "Mantissa Width: 23" (rect 134 162 350 334)(font "Arial" ))
67 | 		(line (pt 80 32)(pt 144 32))
68 | 		(line (pt 144 32)(pt 144 112))
69 | 		(line (pt 80 112)(pt 144 112))
70 | 		(line (pt 80 32)(pt 80 112))
71 | 		(line (pt 0 0)(pt 224 0))
72 | 		(line (pt 224 0)(pt 224 176))
73 | 		(line (pt 0 176)(pt 224 176))
74 | 		(line (pt 0 0)(pt 0 176))
75 | 	)
76 | )
77 | 


--------------------------------------------------------------------------------
/rtl/qip/float_mult.cmp:
--------------------------------------------------------------------------------
 1 | --Copyright (C) 1991-2015 Altera Corporation. All rights reserved.
 2 | --Your use of Altera Corporation's design tools, logic functions 
 3 | --and other software and tools, and its AMPP partner logic 
 4 | --functions, and any output files from any of the foregoing 
 5 | --(including device programming or simulation files), and any 
 6 | --associated documentation or information are expressly subject 
 7 | --to the terms and conditions of the Altera Program License 
 8 | --Subscription Agreement, the Altera Quartus Prime License Agreement,
 9 | --the Altera MegaCore Function License Agreement, or other 
10 | --applicable license agreement, including, without limitation, 
11 | --that your use is for the sole purpose of programming logic 
12 | --devices manufactured by Altera and sold by Altera or its 
13 | --authorized distributors.  Please refer to the applicable 
14 | --agreement for further details.
15 | 
16 | 
17 | component float_mult
18 | 	PORT
19 | 	(
20 | 		clk_en		: IN STD_LOGIC ;
21 | 		clock		: IN STD_LOGIC ;
22 | 		dataa		: IN STD_LOGIC_VECTOR (31 DOWNTO 0);
23 | 		datab		: IN STD_LOGIC_VECTOR (31 DOWNTO 0);
24 | 		result		: OUT STD_LOGIC_VECTOR (31 DOWNTO 0)
25 | 	);
26 | end component;
27 | 


--------------------------------------------------------------------------------
/rtl/qip/float_mult.inc:
--------------------------------------------------------------------------------
 1 | --Copyright (C) 1991-2015 Altera Corporation. All rights reserved.
 2 | --Your use of Altera Corporation's design tools, logic functions 
 3 | --and other software and tools, and its AMPP partner logic 
 4 | --functions, and any output files from any of the foregoing 
 5 | --(including device programming or simulation files), and any 
 6 | --associated documentation or information are expressly subject 
 7 | --to the terms and conditions of the Altera Program License 
 8 | --Subscription Agreement, the Altera Quartus Prime License Agreement,
 9 | --the Altera MegaCore Function License Agreement, or other 
10 | --applicable license agreement, including, without limitation, 
11 | --that your use is for the sole purpose of programming logic 
12 | --devices manufactured by Altera and sold by Altera or its 
13 | --authorized distributors.  Please refer to the applicable 
14 | --agreement for further details.
15 | 
16 | 
17 | FUNCTION float_mult 
18 | (
19 | 	clk_en,
20 | 	clock,
21 | 	dataa[31..0],
22 | 	datab[31..0]
23 | )
24 | 
25 | RETURNS (
26 | 	result[31..0]
27 | );
28 | 


--------------------------------------------------------------------------------
/rtl/qip/float_mult.qip:
--------------------------------------------------------------------------------
 1 | set_global_assignment -name IP_TOOL_NAME "ALTFP_MULT"
 2 | set_global_assignment -name IP_TOOL_VERSION "15.1"
 3 | set_global_assignment -name IP_GENERATED_DEVICE_FAMILY "{Stratix V}"
 4 | set_global_assignment -name VERILOG_FILE [file join $::quartus(qip_path) "float_mult.v"]
 5 | set_global_assignment -name MISC_FILE [file join $::quartus(qip_path) "float_mult.bsf"]
 6 | set_global_assignment -name MISC_FILE [file join $::quartus(qip_path) "float_mult_inst.v"]
 7 | set_global_assignment -name MISC_FILE [file join $::quartus(qip_path) "float_mult_bb.v"]
 8 | set_global_assignment -name MISC_FILE [file join $::quartus(qip_path) "float_mult.inc"]
 9 | set_global_assignment -name MISC_FILE [file join $::quartus(qip_path) "float_mult.cmp"]
10 | set_global_assignment -name MISC_FILE [file join $::quartus(qip_path) "float_mult_syn.v"]
11 | 


--------------------------------------------------------------------------------
/rtl/qip/float_mult.v:
--------------------------------------------------------------------------------
  1 | // megafunction wizard: %ALTFP_MULT%
  2 | // GENERATION: STANDARD
  3 | // VERSION: WM1.0
  4 | // MODULE: ALTFP_MULT 
  5 | 
  6 | // ============================================================
  7 | // File Name: float_mult.v
  8 | // Megafunction Name(s):
  9 | // 			ALTFP_MULT
 10 | //
 11 | // Simulation Library Files(s):
 12 | // 			lpm
 13 | // ============================================================
 14 | // ************************************************************
 15 | // THIS IS A WIZARD-GENERATED FILE. DO NOT EDIT THIS FILE!
 16 | //
 17 | // 15.1.1 Build 189 12/02/2015 SJ Standard Edition
 18 | // ************************************************************
 19 | 
 20 | 
 21 | //Copyright (C) 1991-2015 Altera Corporation. All rights reserved.
 22 | //Your use of Altera Corporation's design tools, logic functions 
 23 | //and other software and tools, and its AMPP partner logic 
 24 | //functions, and any output files from any of the foregoing 
 25 | //(including device programming or simulation files), and any 
 26 | //associated documentation or information are expressly subject 
 27 | //to the terms and conditions of the Altera Program License 
 28 | //Subscription Agreement, the Altera Quartus Prime License Agreement,
 29 | //the Altera MegaCore Function License Agreement, or other 
 30 | //applicable license agreement, including, without limitation, 
 31 | //that your use is for the sole purpose of programming logic 
 32 | //devices manufactured by Altera and sold by Altera or its 
 33 | //authorized distributors.  Please refer to the applicable 
 34 | //agreement for further details.
 35 | 
 36 | 
 37 | //altfp_mult CBX_AUTO_BLACKBOX="ALL" DEDICATED_MULTIPLIER_CIRCUITRY="YES" DENORMAL_SUPPORT="NO" DEVICE_FAMILY="Stratix V" EXCEPTION_HANDLING="NO" PIPELINE=5 REDUCED_FUNCTIONALITY="NO" ROUNDING="TO_NEAREST" WIDTH_EXP=8 WIDTH_MAN=23 clk_en clock dataa datab result
 38 | //VERSION_BEGIN 15.1 cbx_alt_ded_mult_y 2015:11:24:18:49:55:SJ cbx_altbarrel_shift 2015:11:24:18:49:55:SJ cbx_altera_mult_add 2015:11:24:18:49:55:SJ cbx_altera_mult_add_rtl 2015:11:24:18:49:55:SJ cbx_altfp_mult 2015:11:24:18:49:55:SJ cbx_altmult_add 2015:11:24:18:49:55:SJ cbx_cycloneii 2015:11:24:18:49:55:SJ cbx_lpm_add_sub 2015:11:24:18:49:55:SJ cbx_lpm_compare 2015:11:24:18:49:55:SJ cbx_lpm_mult 2015:11:24:18:49:55:SJ cbx_mgl 2015:11:24:20:43:33:SJ cbx_nadder 2015:11:24:18:49:55:SJ cbx_padd 2015:11:24:18:49:55:SJ cbx_parallel_add 2015:11:24:18:49:55:SJ cbx_stratix 2015:11:24:18:49:55:SJ cbx_stratixii 2015:11:24:18:49:55:SJ cbx_util_mgl 2015:11:24:18:49:55:SJ  VERSION_END
 39 | // synthesis VERILOG_INPUT_VERSION VERILOG_2001
 40 | // altera message_off 10463
 41 | 
 42 | 
 43 | //synthesis_resources = lpm_add_sub 4 lpm_mult 1 reg 136 
 44 | //synopsys translate_off
 45 | `timescale 1 ps / 1 ps
 46 | //synopsys translate_on
 47 | module  float_mult_altfp_mult_t9o
 48 | 	( 
 49 | 	clk_en,
 50 | 	clock,
 51 | 	dataa,
 52 | 	datab,
 53 | 	result) ;
 54 | 	input   clk_en;
 55 | 	input   clock;
 56 | 	input   [31:0]  dataa;
 57 | 	input   [31:0]  datab;
 58 | 	output   [31:0]  result;
 59 | `ifndef ALTERA_RESERVED_QIS
 60 | // synopsys translate_off
 61 | `endif
 62 | 	tri1   clk_en;
 63 | `ifndef ALTERA_RESERVED_QIS
 64 | // synopsys translate_on
 65 | `endif
 66 | 
 67 | 	reg	dataa_exp_all_one_ff_p1;
 68 | 	reg	dataa_exp_not_zero_ff_p1;
 69 | 	reg	dataa_man_not_zero_ff_p1;
 70 | 	reg	dataa_man_not_zero_ff_p2;
 71 | 	reg	datab_exp_all_one_ff_p1;
 72 | 	reg	datab_exp_not_zero_ff_p1;
 73 | 	reg	datab_man_not_zero_ff_p1;
 74 | 	reg	datab_man_not_zero_ff_p2;
 75 | 	reg	[9:0]	delay_exp2_bias;
 76 | 	reg	[9:0]	delay_exp_bias;
 77 | 	reg	delay_man_product_msb;
 78 | 	reg	delay_man_product_msb_p0;
 79 | 	reg	[8:0]	exp_add_p1;
 80 | 	reg	[7:0]	exp_result_ff;
 81 | 	reg	input_is_infinity_dffe_0;
 82 | 	reg	input_is_infinity_dffe_1;
 83 | 	reg	input_is_infinity_ff1;
 84 | 	reg	input_is_nan_dffe_0;
 85 | 	reg	input_is_nan_dffe_1;
 86 | 	reg	input_is_nan_ff1;
 87 | 	reg	input_not_zero_dffe_0;
 88 | 	reg	input_not_zero_dffe_1;
 89 | 	reg	input_not_zero_ff1;
 90 | 	reg	lsb_dffe;
 91 | 	reg	[22:0]	man_result_ff;
 92 | 	reg	[23:0]	man_round_p;
 93 | 	reg	[24:0]	man_round_p2;
 94 | 	reg	round_dffe;
 95 | 	reg	[0:0]	sign_node_ff0;
 96 | 	reg	[0:0]	sign_node_ff1;
 97 | 	reg	[0:0]	sign_node_ff2;
 98 | 	reg	[0:0]	sign_node_ff3;
 99 | 	reg	[0:0]	sign_node_ff4;
100 | 	reg	sticky_dffe;
101 | 	wire  [8:0]   wire_exp_add_adder_result;
102 | 	wire  [9:0]   wire_exp_adj_adder_result;
103 | 	wire  [9:0]   wire_exp_bias_subtr_result;
104 | 	wire  [24:0]   wire_man_round_adder_result;
105 | 	wire  [47:0]   wire_man_product2_mult_result;
106 | 	wire aclr;
107 | 	wire  [9:0]  bias;
108 | 	wire  [7:0]  dataa_exp_all_one;
109 | 	wire  [7:0]  dataa_exp_not_zero;
110 | 	wire  [22:0]  dataa_man_not_zero;
111 | 	wire  [7:0]  datab_exp_all_one;
112 | 	wire  [7:0]  datab_exp_not_zero;
113 | 	wire  [22:0]  datab_man_not_zero;
114 | 	wire  exp_is_inf;
115 | 	wire  exp_is_zero;
116 | 	wire  [9:0]  expmod;
117 | 	wire  [7:0]  inf_num;
118 | 	wire  lsb_bit;
119 | 	wire  [23:0]  man_result_round;
120 | 	wire  [24:0]  man_shift_full;
121 | 	wire  [7:0]  result_exp_all_one;
122 | 	wire  [8:0]  result_exp_not_zero;
123 | 	wire  round_bit;
124 | 	wire  round_carry;
125 | 	wire  [22:0]  sticky_bit;
126 | 
127 | 	// synopsys translate_off
128 | 	initial
129 | 		dataa_exp_all_one_ff_p1 = 0;
130 | 	// synopsys translate_on
131 | 	always @ ( posedge clock or  posedge aclr)
132 | 		if (aclr == 1'b1) dataa_exp_all_one_ff_p1 <= 1'b0;
133 | 		else if  (clk_en == 1'b1)   dataa_exp_all_one_ff_p1 <= dataa_exp_all_one[7];
134 | 	// synopsys translate_off
135 | 	initial
136 | 		dataa_exp_not_zero_ff_p1 = 0;
137 | 	// synopsys translate_on
138 | 	always @ ( posedge clock or  posedge aclr)
139 | 		if (aclr == 1'b1) dataa_exp_not_zero_ff_p1 <= 1'b0;
140 | 		else if  (clk_en == 1'b1)   dataa_exp_not_zero_ff_p1 <= dataa_exp_not_zero[7];
141 | 	// synopsys translate_off
142 | 	initial
143 | 		dataa_man_not_zero_ff_p1 = 0;
144 | 	// synopsys translate_on
145 | 	always @ ( posedge clock or  posedge aclr)
146 | 		if (aclr == 1'b1) dataa_man_not_zero_ff_p1 <= 1'b0;
147 | 		else if  (clk_en == 1'b1)   dataa_man_not_zero_ff_p1 <= dataa_man_not_zero[10];
148 | 	// synopsys translate_off
149 | 	initial
150 | 		dataa_man_not_zero_ff_p2 = 0;
151 | 	// synopsys translate_on
152 | 	always @ ( posedge clock or  posedge aclr)
153 | 		if (aclr == 1'b1) dataa_man_not_zero_ff_p2 <= 1'b0;
154 | 		else if  (clk_en == 1'b1)   dataa_man_not_zero_ff_p2 <= dataa_man_not_zero[22];
155 | 	// synopsys translate_off
156 | 	initial
157 | 		datab_exp_all_one_ff_p1 = 0;
158 | 	// synopsys translate_on
159 | 	always @ ( posedge clock or  posedge aclr)
160 | 		if (aclr == 1'b1) datab_exp_all_one_ff_p1 <= 1'b0;
161 | 		else if  (clk_en == 1'b1)   datab_exp_all_one_ff_p1 <= datab_exp_all_one[7];
162 | 	// synopsys translate_off
163 | 	initial
164 | 		datab_exp_not_zero_ff_p1 = 0;
165 | 	// synopsys translate_on
166 | 	always @ ( posedge clock or  posedge aclr)
167 | 		if (aclr == 1'b1) datab_exp_not_zero_ff_p1 <= 1'b0;
168 | 		else if  (clk_en == 1'b1)   datab_exp_not_zero_ff_p1 <= datab_exp_not_zero[7];
169 | 	// synopsys translate_off
170 | 	initial
171 | 		datab_man_not_zero_ff_p1 = 0;
172 | 	// synopsys translate_on
173 | 	always @ ( posedge clock or  posedge aclr)
174 | 		if (aclr == 1'b1) datab_man_not_zero_ff_p1 <= 1'b0;
175 | 		else if  (clk_en == 1'b1)   datab_man_not_zero_ff_p1 <= datab_man_not_zero[10];
176 | 	// synopsys translate_off
177 | 	initial
178 | 		datab_man_not_zero_ff_p2 = 0;
179 | 	// synopsys translate_on
180 | 	always @ ( posedge clock or  posedge aclr)
181 | 		if (aclr == 1'b1) datab_man_not_zero_ff_p2 <= 1'b0;
182 | 		else if  (clk_en == 1'b1)   datab_man_not_zero_ff_p2 <= datab_man_not_zero[22];
183 | 	// synopsys translate_off
184 | 	initial
185 | 		delay_exp2_bias = 0;
186 | 	// synopsys translate_on
187 | 	always @ ( posedge clock or  posedge aclr)
188 | 		if (aclr == 1'b1) delay_exp2_bias <= 10'b0;
189 | 		else if  (clk_en == 1'b1)   delay_exp2_bias <= delay_exp_bias;
190 | 	// synopsys translate_off
191 | 	initial
192 | 		delay_exp_bias = 0;
193 | 	// synopsys translate_on
194 | 	always @ ( posedge clock or  posedge aclr)
195 | 		if (aclr == 1'b1) delay_exp_bias <= 10'b0;
196 | 		else if  (clk_en == 1'b1)   delay_exp_bias <= wire_exp_bias_subtr_result;
197 | 	// synopsys translate_off
198 | 	initial
199 | 		delay_man_product_msb = 0;
200 | 	// synopsys translate_on
201 | 	always @ ( posedge clock or  posedge aclr)
202 | 		if (aclr == 1'b1) delay_man_product_msb <= 1'b0;
203 | 		else if  (clk_en == 1'b1)   delay_man_product_msb <= delay_man_product_msb_p0;
204 | 	// synopsys translate_off
205 | 	initial
206 | 		delay_man_product_msb_p0 = 0;
207 | 	// synopsys translate_on
208 | 	always @ ( posedge clock or  posedge aclr)
209 | 		if (aclr == 1'b1) delay_man_product_msb_p0 <= 1'b0;
210 | 		else if  (clk_en == 1'b1)   delay_man_product_msb_p0 <= wire_man_product2_mult_result[47];
211 | 	// synopsys translate_off
212 | 	initial
213 | 		exp_add_p1 = 0;
214 | 	// synopsys translate_on
215 | 	always @ ( posedge clock or  posedge aclr)
216 | 		if (aclr == 1'b1) exp_add_p1 <= 9'b0;
217 | 		else if  (clk_en == 1'b1)   exp_add_p1 <= wire_exp_add_adder_result;
218 | 	// synopsys translate_off
219 | 	initial
220 | 		exp_result_ff = 0;
221 | 	// synopsys translate_on
222 | 	always @ ( posedge clock or  posedge aclr)
223 | 		if (aclr == 1'b1) exp_result_ff <= 8'b0;
224 | 		else if  (clk_en == 1'b1)   exp_result_ff <= ((inf_num & {8{((exp_is_inf | input_is_infinity_ff1) | input_is_nan_ff1)}}) | ((wire_exp_adj_adder_result[7:0] & {8{(~ exp_is_zero)}}) & {8{input_not_zero_ff1}}));
225 | 	// synopsys translate_off
226 | 	initial
227 | 		input_is_infinity_dffe_0 = 0;
228 | 	// synopsys translate_on
229 | 	always @ ( posedge clock or  posedge aclr)
230 | 		if (aclr == 1'b1) input_is_infinity_dffe_0 <= 1'b0;
231 | 		else if  (clk_en == 1'b1)   input_is_infinity_dffe_0 <= ((dataa_exp_all_one_ff_p1 & (~ (dataa_man_not_zero_ff_p1 | dataa_man_not_zero_ff_p2))) | (datab_exp_all_one_ff_p1 & (~ (datab_man_not_zero_ff_p1 | datab_man_not_zero_ff_p2))));
232 | 	// synopsys translate_off
233 | 	initial
234 | 		input_is_infinity_dffe_1 = 0;
235 | 	// synopsys translate_on
236 | 	always @ ( posedge clock or  posedge aclr)
237 | 		if (aclr == 1'b1) input_is_infinity_dffe_1 <= 1'b0;
238 | 		else if  (clk_en == 1'b1)   input_is_infinity_dffe_1 <= input_is_infinity_dffe_0;
239 | 	// synopsys translate_off
240 | 	initial
241 | 		input_is_infinity_ff1 = 0;
242 | 	// synopsys translate_on
243 | 	always @ ( posedge clock or  posedge aclr)
244 | 		if (aclr == 1'b1) input_is_infinity_ff1 <= 1'b0;
245 | 		else if  (clk_en == 1'b1)   input_is_infinity_ff1 <= input_is_infinity_dffe_1;
246 | 	// synopsys translate_off
247 | 	initial
248 | 		input_is_nan_dffe_0 = 0;
249 | 	// synopsys translate_on
250 | 	always @ ( posedge clock or  posedge aclr)
251 | 		if (aclr == 1'b1) input_is_nan_dffe_0 <= 1'b0;
252 | 		else if  (clk_en == 1'b1)   input_is_nan_dffe_0 <= ((dataa_exp_all_one_ff_p1 & (dataa_man_not_zero_ff_p1 | dataa_man_not_zero_ff_p2)) | (datab_exp_all_one_ff_p1 & (datab_man_not_zero_ff_p1 | datab_man_not_zero_ff_p2)));
253 | 	// synopsys translate_off
254 | 	initial
255 | 		input_is_nan_dffe_1 = 0;
256 | 	// synopsys translate_on
257 | 	always @ ( posedge clock or  posedge aclr)
258 | 		if (aclr == 1'b1) input_is_nan_dffe_1 <= 1'b0;
259 | 		else if  (clk_en == 1'b1)   input_is_nan_dffe_1 <= input_is_nan_dffe_0;
260 | 	// synopsys translate_off
261 | 	initial
262 | 		input_is_nan_ff1 = 0;
263 | 	// synopsys translate_on
264 | 	always @ ( posedge clock or  posedge aclr)
265 | 		if (aclr == 1'b1) input_is_nan_ff1 <= 1'b0;
266 | 		else if  (clk_en == 1'b1)   input_is_nan_ff1 <= input_is_nan_dffe_1;
267 | 	// synopsys translate_off
268 | 	initial
269 | 		input_not_zero_dffe_0 = 0;
270 | 	// synopsys translate_on
271 | 	always @ ( posedge clock or  posedge aclr)
272 | 		if (aclr == 1'b1) input_not_zero_dffe_0 <= 1'b0;
273 | 		else if  (clk_en == 1'b1)   input_not_zero_dffe_0 <= (dataa_exp_not_zero_ff_p1 & datab_exp_not_zero_ff_p1);
274 | 	// synopsys translate_off
275 | 	initial
276 | 		input_not_zero_dffe_1 = 0;
277 | 	// synopsys translate_on
278 | 	always @ ( posedge clock or  posedge aclr)
279 | 		if (aclr == 1'b1) input_not_zero_dffe_1 <= 1'b0;
280 | 		else if  (clk_en == 1'b1)   input_not_zero_dffe_1 <= input_not_zero_dffe_0;
281 | 	// synopsys translate_off
282 | 	initial
283 | 		input_not_zero_ff1 = 0;
284 | 	// synopsys translate_on
285 | 	always @ ( posedge clock or  posedge aclr)
286 | 		if (aclr == 1'b1) input_not_zero_ff1 <= 1'b0;
287 | 		else if  (clk_en == 1'b1)   input_not_zero_ff1 <= input_not_zero_dffe_1;
288 | 	// synopsys translate_off
289 | 	initial
290 | 		lsb_dffe = 0;
291 | 	// synopsys translate_on
292 | 	always @ ( posedge clock or  posedge aclr)
293 | 		if (aclr == 1'b1) lsb_dffe <= 1'b0;
294 | 		else if  (clk_en == 1'b1)   lsb_dffe <= lsb_bit;
295 | 	// synopsys translate_off
296 | 	initial
297 | 		man_result_ff = 0;
298 | 	// synopsys translate_on
299 | 	always @ ( posedge clock or  posedge aclr)
300 | 		if (aclr == 1'b1) man_result_ff <= 23'b0;
301 | 		else if  (clk_en == 1'b1)   man_result_ff <= {((((((man_result_round[22] & input_not_zero_ff1) & (~ input_is_infinity_ff1)) & (~ exp_is_inf)) & (~ exp_is_zero)) | (input_is_infinity_ff1 & (~ input_not_zero_ff1))) | input_is_nan_ff1), (((((man_result_round[21:0] & {22{input_not_zero_ff1}}) & {22{(~ input_is_infinity_ff1)}}) & {22{(~ exp_is_inf)}}) & {22{(~ exp_is_zero)}}) & {22{(~ input_is_nan_ff1)}})};
302 | 	// synopsys translate_off
303 | 	initial
304 | 		man_round_p = 0;
305 | 	// synopsys translate_on
306 | 	always @ ( posedge clock or  posedge aclr)
307 | 		if (aclr == 1'b1) man_round_p <= 24'b0;
308 | 		else if  (clk_en == 1'b1)   man_round_p <= man_shift_full[24:1];
309 | 	// synopsys translate_off
310 | 	initial
311 | 		man_round_p2 = 0;
312 | 	// synopsys translate_on
313 | 	always @ ( posedge clock or  posedge aclr)
314 | 		if (aclr == 1'b1) man_round_p2 <= 25'b0;
315 | 		else if  (clk_en == 1'b1)   man_round_p2 <= wire_man_round_adder_result;
316 | 	// synopsys translate_off
317 | 	initial
318 | 		round_dffe = 0;
319 | 	// synopsys translate_on
320 | 	always @ ( posedge clock or  posedge aclr)
321 | 		if (aclr == 1'b1) round_dffe <= 1'b0;
322 | 		else if  (clk_en == 1'b1)   round_dffe <= round_bit;
323 | 	// synopsys translate_off
324 | 	initial
325 | 		sign_node_ff0 = 0;
326 | 	// synopsys translate_on
327 | 	always @ ( posedge clock or  posedge aclr)
328 | 		if (aclr == 1'b1) sign_node_ff0 <= 1'b0;
329 | 		else if  (clk_en == 1'b1)   sign_node_ff0 <= (dataa[31] ^ datab[31]);
330 | 	// synopsys translate_off
331 | 	initial
332 | 		sign_node_ff1 = 0;
333 | 	// synopsys translate_on
334 | 	always @ ( posedge clock or  posedge aclr)
335 | 		if (aclr == 1'b1) sign_node_ff1 <= 1'b0;
336 | 		else if  (clk_en == 1'b1)   sign_node_ff1 <= sign_node_ff0[0:0];
337 | 	// synopsys translate_off
338 | 	initial
339 | 		sign_node_ff2 = 0;
340 | 	// synopsys translate_on
341 | 	always @ ( posedge clock or  posedge aclr)
342 | 		if (aclr == 1'b1) sign_node_ff2 <= 1'b0;
343 | 		else if  (clk_en == 1'b1)   sign_node_ff2 <= sign_node_ff1[0:0];
344 | 	// synopsys translate_off
345 | 	initial
346 | 		sign_node_ff3 = 0;
347 | 	// synopsys translate_on
348 | 	always @ ( posedge clock or  posedge aclr)
349 | 		if (aclr == 1'b1) sign_node_ff3 <= 1'b0;
350 | 		else if  (clk_en == 1'b1)   sign_node_ff3 <= sign_node_ff2[0:0];
351 | 	// synopsys translate_off
352 | 	initial
353 | 		sign_node_ff4 = 0;
354 | 	// synopsys translate_on
355 | 	always @ ( posedge clock or  posedge aclr)
356 | 		if (aclr == 1'b1) sign_node_ff4 <= 1'b0;
357 | 		else if  (clk_en == 1'b1)   sign_node_ff4 <= sign_node_ff3[0:0];
358 | 	// synopsys translate_off
359 | 	initial
360 | 		sticky_dffe = 0;
361 | 	// synopsys translate_on
362 | 	always @ ( posedge clock or  posedge aclr)
363 | 		if (aclr == 1'b1) sticky_dffe <= 1'b0;
364 | 		else if  (clk_en == 1'b1)   sticky_dffe <= sticky_bit[22];
365 | 	lpm_add_sub   exp_add_adder
366 | 	( 
367 | 	.aclr(aclr),
368 | 	.cin(1'b0),
369 | 	.clken(clk_en),
370 | 	.clock(clock),
371 | 	.cout(),
372 | 	.dataa({1'b0, dataa[30:23]}),
373 | 	.datab({1'b0, datab[30:23]}),
374 | 	.overflow(),
375 | 	.result(wire_exp_add_adder_result)
376 | 	`ifndef FORMAL_VERIFICATION
377 | 	// synopsys translate_off
378 | 	`endif
379 | 	,
380 | 	.add_sub(1'b1)
381 | 	`ifndef FORMAL_VERIFICATION
382 | 	// synopsys translate_on
383 | 	`endif
384 | 	);
385 | 	defparam
386 | 		exp_add_adder.lpm_pipeline = 1,
387 | 		exp_add_adder.lpm_width = 9,
388 | 		exp_add_adder.lpm_type = "lpm_add_sub";
389 | 	lpm_add_sub   exp_adj_adder
390 | 	( 
391 | 	.cin(1'b0),
392 | 	.cout(),
393 | 	.dataa(delay_exp2_bias),
394 | 	.datab(expmod),
395 | 	.overflow(),
396 | 	.result(wire_exp_adj_adder_result)
397 | 	`ifndef FORMAL_VERIFICATION
398 | 	// synopsys translate_off
399 | 	`endif
400 | 	,
401 | 	.aclr(1'b0),
402 | 	.add_sub(1'b1),
403 | 	.clken(1'b1),
404 | 	.clock(1'b0)
405 | 	`ifndef FORMAL_VERIFICATION
406 | 	// synopsys translate_on
407 | 	`endif
408 | 	);
409 | 	defparam
410 | 		exp_adj_adder.lpm_width = 10,
411 | 		exp_adj_adder.lpm_type = "lpm_add_sub";
412 | 	lpm_add_sub   exp_bias_subtr
413 | 	( 
414 | 	.cout(),
415 | 	.dataa({1'b0, exp_add_p1[8:0]}),
416 | 	.datab({bias[9:0]}),
417 | 	.overflow(),
418 | 	.result(wire_exp_bias_subtr_result)
419 | 	`ifndef FORMAL_VERIFICATION
420 | 	// synopsys translate_off
421 | 	`endif
422 | 	,
423 | 	.aclr(1'b0),
424 | 	.add_sub(1'b1),
425 | 	.cin(),
426 | 	.clken(1'b1),
427 | 	.clock(1'b0)
428 | 	`ifndef FORMAL_VERIFICATION
429 | 	// synopsys translate_on
430 | 	`endif
431 | 	);
432 | 	defparam
433 | 		exp_bias_subtr.lpm_direction = "SUB",
434 | 		exp_bias_subtr.lpm_pipeline = 0,
435 | 		exp_bias_subtr.lpm_representation = "UNSIGNED",
436 | 		exp_bias_subtr.lpm_width = 10,
437 | 		exp_bias_subtr.lpm_type = "lpm_add_sub";
438 | 	lpm_add_sub   man_round_adder
439 | 	( 
440 | 	.cout(),
441 | 	.dataa({1'b0, man_round_p}),
442 | 	.datab({{24{1'b0}}, round_carry}),
443 | 	.overflow(),
444 | 	.result(wire_man_round_adder_result)
445 | 	`ifndef FORMAL_VERIFICATION
446 | 	// synopsys translate_off
447 | 	`endif
448 | 	,
449 | 	.aclr(1'b0),
450 | 	.add_sub(1'b1),
451 | 	.cin(),
452 | 	.clken(1'b1),
453 | 	.clock(1'b0)
454 | 	`ifndef FORMAL_VERIFICATION
455 | 	// synopsys translate_on
456 | 	`endif
457 | 	);
458 | 	defparam
459 | 		man_round_adder.lpm_pipeline = 0,
460 | 		man_round_adder.lpm_width = 25,
461 | 		man_round_adder.lpm_type = "lpm_add_sub";
462 | 	lpm_mult   man_product2_mult
463 | 	( 
464 | 	.aclr(aclr),
465 | 	.clken(clk_en),
466 | 	.clock(clock),
467 | 	.dataa({1'b1, dataa[22:0]}),
468 | 	.datab({1'b1, datab[22:0]}),
469 | 	.result(wire_man_product2_mult_result)
470 | 	`ifndef FORMAL_VERIFICATION
471 | 	// synopsys translate_off
472 | 	`endif
473 | 	,
474 | 	.sum({1{1'b0}})
475 | 	`ifndef FORMAL_VERIFICATION
476 | 	// synopsys translate_on
477 | 	`endif
478 | 	);
479 | 	defparam
480 | 		man_product2_mult.lpm_pipeline = 2,
481 | 		man_product2_mult.lpm_representation = "UNSIGNED",
482 | 		man_product2_mult.lpm_widtha = 24,
483 | 		man_product2_mult.lpm_widthb = 24,
484 | 		man_product2_mult.lpm_widthp = 48,
485 | 		man_product2_mult.lpm_widths = 1,
486 | 		man_product2_mult.lpm_type = "lpm_mult",
487 | 		man_product2_mult.lpm_hint = "DEDICATED_MULTIPLIER_CIRCUITRY=YES";
488 | 	assign
489 | 		aclr = 1'b0,
490 | 		bias = {{3{1'b0}}, {7{1'b1}}},
491 | 		dataa_exp_all_one = {(dataa[30] & dataa_exp_all_one[6]), (dataa[29] & dataa_exp_all_one[5]), (dataa[28] & dataa_exp_all_one[4]), (dataa[27] & dataa_exp_all_one[3]), (dataa[26] & dataa_exp_all_one[2]), (dataa[25] & dataa_exp_all_one[1]), (dataa[24] & dataa_exp_all_one[0]), dataa[23]},
492 | 		dataa_exp_not_zero = {(dataa[30] | dataa_exp_not_zero[6]), (dataa[29] | dataa_exp_not_zero[5]), (dataa[28] | dataa_exp_not_zero[4]), (dataa[27] | dataa_exp_not_zero[3]), (dataa[26] | dataa_exp_not_zero[2]), (dataa[25] | dataa_exp_not_zero[1]), (dataa[24] | dataa_exp_not_zero[0]), dataa[23]},
493 | 		dataa_man_not_zero = {(dataa[22] | dataa_man_not_zero[21]), (dataa[21] | dataa_man_not_zero[20]), (dataa[20] | dataa_man_not_zero[19]), (dataa[19] | dataa_man_not_zero[18]), (dataa[18] | dataa_man_not_zero[17]), (dataa[17] | dataa_man_not_zero[16]), (dataa[16] | dataa_man_not_zero[15]), (dataa[15] | dataa_man_not_zero[14]), (dataa[14] | dataa_man_not_zero[13]), (dataa[13] | dataa_man_not_zero[12]), (dataa[12] | dataa_man_not_zero[11]), dataa[11], (dataa[10] | dataa_man_not_zero[9]), (dataa[9] | dataa_man_not_zero[8]), (dataa[8] | dataa_man_not_zero[7]), (dataa[7] | dataa_man_not_zero[6]), (dataa[6] | dataa_man_not_zero[5]), (dataa[5] | dataa_man_not_zero[4]), (dataa[4] | dataa_man_not_zero[3]), (dataa[3] | dataa_man_not_zero[2]), (dataa[2] | dataa_man_not_zero[1]), (dataa[1] | dataa_man_not_zero[0]), dataa[0]},
494 | 		datab_exp_all_one = {(datab[30] & datab_exp_all_one[6]), (datab[29] & datab_exp_all_one[5]), (datab[28] & datab_exp_all_one[4]), (datab[27] & datab_exp_all_one[3]), (datab[26] & datab_exp_all_one[2]), (datab[25] & datab_exp_all_one[1]), (datab[24] & datab_exp_all_one[0]), datab[23]},
495 | 		datab_exp_not_zero = {(datab[30] | datab_exp_not_zero[6]), (datab[29] | datab_exp_not_zero[5]), (datab[28] | datab_exp_not_zero[4]), (datab[27] | datab_exp_not_zero[3]), (datab[26] | datab_exp_not_zero[2]), (datab[25] | datab_exp_not_zero[1]), (datab[24] | datab_exp_not_zero[0]), datab[23]},
496 | 		datab_man_not_zero = {(datab[22] | datab_man_not_zero[21]), (datab[21] | datab_man_not_zero[20]), (datab[20] | datab_man_not_zero[19]), (datab[19] | datab_man_not_zero[18]), (datab[18] | datab_man_not_zero[17]), (datab[17] | datab_man_not_zero[16]), (datab[16] | datab_man_not_zero[15]), (datab[15] | datab_man_not_zero[14]), (datab[14] | datab_man_not_zero[13]), (datab[13] | datab_man_not_zero[12]), (datab[12] | datab_man_not_zero[11]), datab[11], (datab[10] | datab_man_not_zero[9]), (datab[9] | datab_man_not_zero[8]), (datab[8] | datab_man_not_zero[7]), (datab[7] | datab_man_not_zero[6]), (datab[6] | datab_man_not_zero[5]), (datab[5] | datab_man_not_zero[4]), (datab[4] | datab_man_not_zero[3]), (datab[3] | datab_man_not_zero[2]), (datab[2] | datab_man_not_zero[1]), (datab[1] | datab_man_not_zero[0]), datab[0]},
497 | 		exp_is_inf = (((~ wire_exp_adj_adder_result[9]) & wire_exp_adj_adder_result[8]) | ((~ wire_exp_adj_adder_result[8]) & result_exp_all_one[7])),
498 | 		exp_is_zero = (wire_exp_adj_adder_result[9] | (~ result_exp_not_zero[8])),
499 | 		expmod = {{8{1'b0}}, (delay_man_product_msb & man_round_p2[24]), (delay_man_product_msb ^ man_round_p2[24])},
500 | 		inf_num = {8{1'b1}},
501 | 		lsb_bit = man_shift_full[1],
502 | 		man_result_round = ((man_round_p2[23:0] & {24{(~ man_round_p2[24])}}) | (man_round_p2[24:1] & {24{man_round_p2[24]}})),
503 | 		man_shift_full = ((wire_man_product2_mult_result[46:22] & {25{(~ wire_man_product2_mult_result[47])}}) | (wire_man_product2_mult_result[47:23] & {25{wire_man_product2_mult_result[47]}})),
504 | 		result = {sign_node_ff4[0:0], exp_result_ff[7:0], man_result_ff[22:0]},
505 | 		result_exp_all_one = {(result_exp_all_one[6] & wire_exp_adj_adder_result[7]), (result_exp_all_one[5] & wire_exp_adj_adder_result[6]), (result_exp_all_one[4] & wire_exp_adj_adder_result[5]), (result_exp_all_one[3] & wire_exp_adj_adder_result[4]), (result_exp_all_one[2] & wire_exp_adj_adder_result[3]), (result_exp_all_one[1] & wire_exp_adj_adder_result[2]), (result_exp_all_one[0] & wire_exp_adj_adder_result[1]), wire_exp_adj_adder_result[0]},
506 | 		result_exp_not_zero = {(result_exp_not_zero[7] | wire_exp_adj_adder_result[8]), (result_exp_not_zero[6] | wire_exp_adj_adder_result[7]), (result_exp_not_zero[5] | wire_exp_adj_adder_result[6]), (result_exp_not_zero[4] | wire_exp_adj_adder_result[5]), (result_exp_not_zero[3] | wire_exp_adj_adder_result[4]), (result_exp_not_zero[2] | wire_exp_adj_adder_result[3]), (result_exp_not_zero[1] | wire_exp_adj_adder_result[2]), (result_exp_not_zero[0] | wire_exp_adj_adder_result[1]), wire_exp_adj_adder_result[0]},
507 | 		round_bit = man_shift_full[0],
508 | 		round_carry = (round_dffe & (lsb_dffe | sticky_dffe)),
509 | 		sticky_bit = {(sticky_bit[21] | (wire_man_product2_mult_result[47] & wire_man_product2_mult_result[22])), (sticky_bit[20] | wire_man_product2_mult_result[21]), (sticky_bit[19] | wire_man_product2_mult_result[20]), (sticky_bit[18] | wire_man_product2_mult_result[19]), (sticky_bit[17] | wire_man_product2_mult_result[18]), (sticky_bit[16] | wire_man_product2_mult_result[17]), (sticky_bit[15] | wire_man_product2_mult_result[16]), (sticky_bit[14] | wire_man_product2_mult_result[15]), (sticky_bit[13] | wire_man_product2_mult_result[14]), (sticky_bit[12] | wire_man_product2_mult_result[13]), (sticky_bit[11] | wire_man_product2_mult_result[12]), (sticky_bit[10] | wire_man_product2_mult_result[11]), (sticky_bit[9] | wire_man_product2_mult_result[10]), (sticky_bit[8] | wire_man_product2_mult_result[9]), (sticky_bit[7] | wire_man_product2_mult_result[8]), (sticky_bit[6] | wire_man_product2_mult_result[7]), (sticky_bit[5] | wire_man_product2_mult_result[6]), (sticky_bit[4] | wire_man_product2_mult_result[5]), (sticky_bit[3] | wire_man_product2_mult_result[4]), (sticky_bit[2] | wire_man_product2_mult_result[3]), (sticky_bit[1] | wire_man_product2_mult_result[2]), (sticky_bit[0] | wire_man_product2_mult_result[1]), wire_man_product2_mult_result[0]};
510 | endmodule //float_mult_altfp_mult_t9o
511 | //VALID FILE
512 | 
513 | 
514 | // synopsys translate_off
515 | `timescale 1 ps / 1 ps
516 | // synopsys translate_on
517 | module float_mult (
518 | 	clk_en,
519 | 	clock,
520 | 	dataa,
521 | 	datab,
522 | 	result);
523 | 
524 | 	input	  clk_en;
525 | 	input	  clock;
526 | 	input	[31:0]  dataa;
527 | 	input	[31:0]  datab;
528 | 	output	[31:0]  result;
529 | 
530 | 	wire [31:0] sub_wire0;
531 | 	wire [31:0] result = sub_wire0[31:0];
532 | 
533 | 	float_mult_altfp_mult_t9o	float_mult_altfp_mult_t9o_component (
534 | 				.clk_en (clk_en),
535 | 				.clock (clock),
536 | 				.dataa (dataa),
537 | 				.datab (datab),
538 | 				.result (sub_wire0));
539 | 
540 | endmodule
541 | 
542 | // ============================================================
543 | // CNX file retrieval info
544 | // ============================================================
545 | // Retrieval info: LIBRARY: altera_mf altera_mf.altera_mf_components.all
546 | // Retrieval info: PRIVATE: FPM_FORMAT STRING "Single"
547 | // Retrieval info: PRIVATE: INTENDED_DEVICE_FAMILY STRING "Stratix V"
548 | // Retrieval info: CONSTANT: DEDICATED_MULTIPLIER_CIRCUITRY STRING "YES"
549 | // Retrieval info: CONSTANT: DENORMAL_SUPPORT STRING "NO"
550 | // Retrieval info: CONSTANT: EXCEPTION_HANDLING STRING "NO"
551 | // Retrieval info: CONSTANT: INTENDED_DEVICE_FAMILY STRING "UNUSED"
552 | // Retrieval info: CONSTANT: LPM_HINT STRING "UNUSED"
553 | // Retrieval info: CONSTANT: LPM_TYPE STRING "altfp_mult"
554 | // Retrieval info: CONSTANT: PIPELINE NUMERIC "5"
555 | // Retrieval info: CONSTANT: REDUCED_FUNCTIONALITY STRING "NO"
556 | // Retrieval info: CONSTANT: ROUNDING STRING "TO_NEAREST"
557 | // Retrieval info: CONSTANT: WIDTH_EXP NUMERIC "8"
558 | // Retrieval info: CONSTANT: WIDTH_MAN NUMERIC "23"
559 | // Retrieval info: USED_PORT: clk_en 0 0 0 0 INPUT NODEFVAL "clk_en"
560 | // Retrieval info: CONNECT: @clk_en 0 0 0 0 clk_en 0 0 0 0
561 | // Retrieval info: USED_PORT: clock 0 0 0 0 INPUT NODEFVAL "clock"
562 | // Retrieval info: CONNECT: @clock 0 0 0 0 clock 0 0 0 0
563 | // Retrieval info: USED_PORT: dataa 0 0 32 0 INPUT NODEFVAL "dataa[31..0]"
564 | // Retrieval info: CONNECT: @dataa 0 0 32 0 dataa 0 0 32 0
565 | // Retrieval info: USED_PORT: datab 0 0 32 0 INPUT NODEFVAL "datab[31..0]"
566 | // Retrieval info: CONNECT: @datab 0 0 32 0 datab 0 0 32 0
567 | // Retrieval info: USED_PORT: result 0 0 32 0 OUTPUT NODEFVAL "result[31..0]"
568 | // Retrieval info: CONNECT: result 0 0 32 0 @result 0 0 32 0
569 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_mult.v TRUE FALSE
570 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_mult.qip TRUE FALSE
571 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_mult.bsf TRUE TRUE
572 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_mult_inst.v TRUE TRUE
573 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_mult_bb.v TRUE TRUE
574 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_mult.inc TRUE TRUE
575 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_mult.cmp TRUE TRUE
576 | // Retrieval info: PRIVATE: SYNTH_WRAPPER_GEN_POSTFIX NUMERIC "1"
577 | // Retrieval info: LIB_FILE: lpm
578 | 


--------------------------------------------------------------------------------
/rtl/qip/float_mult_bb.v:
--------------------------------------------------------------------------------
 1 | // megafunction wizard: %ALTFP_MULT%VBB%
 2 | // GENERATION: STANDARD
 3 | // VERSION: WM1.0
 4 | // MODULE: ALTFP_MULT 
 5 | 
 6 | // ============================================================
 7 | // File Name: float_mult.v
 8 | // Megafunction Name(s):
 9 | // 			ALTFP_MULT
10 | //
11 | // Simulation Library Files(s):
12 | // 			lpm
13 | // ============================================================
14 | // ************************************************************
15 | // THIS IS A WIZARD-GENERATED FILE. DO NOT EDIT THIS FILE!
16 | //
17 | // 15.1.1 Build 189 12/02/2015 SJ Standard Edition
18 | // ************************************************************
19 | 
20 | //Copyright (C) 1991-2015 Altera Corporation. All rights reserved.
21 | //Your use of Altera Corporation's design tools, logic functions 
22 | //and other software and tools, and its AMPP partner logic 
23 | //functions, and any output files from any of the foregoing 
24 | //(including device programming or simulation files), and any 
25 | //associated documentation or information are expressly subject 
26 | //to the terms and conditions of the Altera Program License 
27 | //Subscription Agreement, the Altera Quartus Prime License Agreement,
28 | //the Altera MegaCore Function License Agreement, or other 
29 | //applicable license agreement, including, without limitation, 
30 | //that your use is for the sole purpose of programming logic 
31 | //devices manufactured by Altera and sold by Altera or its 
32 | //authorized distributors.  Please refer to the applicable 
33 | //agreement for further details.
34 | 
35 | module float_mult (
36 | 	clk_en,
37 | 	clock,
38 | 	dataa,
39 | 	datab,
40 | 	result)/* synthesis synthesis_clearbox = 1 */;
41 | 
42 | 	input	  clk_en;
43 | 	input	  clock;
44 | 	input	[31:0]  dataa;
45 | 	input	[31:0]  datab;
46 | 	output	[31:0]  result;
47 | 
48 | endmodule
49 | 
50 | // ============================================================
51 | // CNX file retrieval info
52 | // ============================================================
53 | // Retrieval info: LIBRARY: altera_mf altera_mf.altera_mf_components.all
54 | // Retrieval info: PRIVATE: FPM_FORMAT STRING "Single"
55 | // Retrieval info: PRIVATE: INTENDED_DEVICE_FAMILY STRING "Stratix V"
56 | // Retrieval info: CONSTANT: DEDICATED_MULTIPLIER_CIRCUITRY STRING "YES"
57 | // Retrieval info: CONSTANT: DENORMAL_SUPPORT STRING "NO"
58 | // Retrieval info: CONSTANT: EXCEPTION_HANDLING STRING "NO"
59 | // Retrieval info: CONSTANT: INTENDED_DEVICE_FAMILY STRING "UNUSED"
60 | // Retrieval info: CONSTANT: LPM_HINT STRING "UNUSED"
61 | // Retrieval info: CONSTANT: LPM_TYPE STRING "altfp_mult"
62 | // Retrieval info: CONSTANT: PIPELINE NUMERIC "5"
63 | // Retrieval info: CONSTANT: REDUCED_FUNCTIONALITY STRING "NO"
64 | // Retrieval info: CONSTANT: ROUNDING STRING "TO_NEAREST"
65 | // Retrieval info: CONSTANT: WIDTH_EXP NUMERIC "8"
66 | // Retrieval info: CONSTANT: WIDTH_MAN NUMERIC "23"
67 | // Retrieval info: USED_PORT: clk_en 0 0 0 0 INPUT NODEFVAL "clk_en"
68 | // Retrieval info: CONNECT: @clk_en 0 0 0 0 clk_en 0 0 0 0
69 | // Retrieval info: USED_PORT: clock 0 0 0 0 INPUT NODEFVAL "clock"
70 | // Retrieval info: CONNECT: @clock 0 0 0 0 clock 0 0 0 0
71 | // Retrieval info: USED_PORT: dataa 0 0 32 0 INPUT NODEFVAL "dataa[31..0]"
72 | // Retrieval info: CONNECT: @dataa 0 0 32 0 dataa 0 0 32 0
73 | // Retrieval info: USED_PORT: datab 0 0 32 0 INPUT NODEFVAL "datab[31..0]"
74 | // Retrieval info: CONNECT: @datab 0 0 32 0 datab 0 0 32 0
75 | // Retrieval info: USED_PORT: result 0 0 32 0 OUTPUT NODEFVAL "result[31..0]"
76 | // Retrieval info: CONNECT: result 0 0 32 0 @result 0 0 32 0
77 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_mult.v TRUE FALSE
78 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_mult.qip TRUE FALSE
79 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_mult.bsf TRUE TRUE
80 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_mult_inst.v TRUE TRUE
81 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_mult_bb.v TRUE TRUE
82 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_mult.inc TRUE TRUE
83 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_mult.cmp TRUE TRUE
84 | // Retrieval info: PRIVATE: SYNTH_WRAPPER_GEN_POSTFIX NUMERIC "1"
85 | // Retrieval info: LIB_FILE: lpm
86 | 


--------------------------------------------------------------------------------
/rtl/qip/float_mult_inst.v:
--------------------------------------------------------------------------------
1 | float_mult	float_mult_inst (
2 | 	.clk_en ( clk_en_sig ),
3 | 	.clock ( clock_sig ),
4 | 	.dataa ( dataa_sig ),
5 | 	.datab ( datab_sig ),
6 | 	.result ( result_sig )
7 | 	);
8 | 


--------------------------------------------------------------------------------
/rtl/qip/float_mult_syn.v:
--------------------------------------------------------------------------------
  1 | // megafunction wizard: %ALTFP_MULT%
  2 | // GENERATION: STANDARD
  3 | // VERSION: WM1.0
  4 | // MODULE: ALTFP_MULT 
  5 | 
  6 | // ============================================================
  7 | // File Name: float_mult.v
  8 | // Megafunction Name(s):
  9 | // 			ALTFP_MULT
 10 | //
 11 | // Simulation Library Files(s):
 12 | // 			lpm
 13 | // ============================================================
 14 | // ************************************************************
 15 | // THIS IS A WIZARD-GENERATED FILE. DO NOT EDIT THIS FILE!
 16 | //
 17 | // 15.1.1 Build 189 12/02/2015 SJ Standard Edition
 18 | // ************************************************************
 19 | 
 20 | 
 21 | //Copyright (C) 1991-2015 Altera Corporation. All rights reserved.
 22 | //Your use of Altera Corporation's design tools, logic functions 
 23 | //and other software and tools, and its AMPP partner logic 
 24 | //functions, and any output files from any of the foregoing 
 25 | //(including device programming or simulation files), and any 
 26 | //associated documentation or information are expressly subject 
 27 | //to the terms and conditions of the Altera Program License 
 28 | //Subscription Agreement, the Altera Quartus Prime License Agreement,
 29 | //the Altera MegaCore Function License Agreement, or other 
 30 | //applicable license agreement, including, without limitation, 
 31 | //that your use is for the sole purpose of programming logic 
 32 | //devices manufactured by Altera and sold by Altera or its 
 33 | //authorized distributors.  Please refer to the applicable 
 34 | //agreement for further details.
 35 | 
 36 | 
 37 | //altfp_mult DEDICATED_MULTIPLIER_CIRCUITRY="YES" DENORMAL_SUPPORT="NO" DEVICE_FAMILY="Stratix V" EXCEPTION_HANDLING="NO" PIPELINE=5 REDUCED_FUNCTIONALITY="NO" ROUNDING="TO_NEAREST" WIDTH_EXP=8 WIDTH_MAN=23 clk_en clock dataa datab result
 38 | //VERSION_BEGIN 15.1 cbx_alt_ded_mult_y 2015:11:24:18:49:55:SJ cbx_altbarrel_shift 2015:11:24:18:49:55:SJ cbx_altera_mult_add 2015:11:24:18:49:55:SJ cbx_altera_mult_add_rtl 2015:11:24:18:49:55:SJ cbx_altfp_mult 2015:11:24:18:49:55:SJ cbx_altmult_add 2015:11:24:18:49:55:SJ cbx_cycloneii 2015:11:24:18:49:55:SJ cbx_lpm_add_sub 2015:11:24:18:49:55:SJ cbx_lpm_compare 2015:11:24:18:49:55:SJ cbx_lpm_mult 2015:11:24:18:49:55:SJ cbx_mgl 2015:11:24:20:43:33:SJ cbx_nadder 2015:11:24:18:49:55:SJ cbx_padd 2015:11:24:18:49:55:SJ cbx_parallel_add 2015:11:24:18:49:55:SJ cbx_stratix 2015:11:24:18:49:55:SJ cbx_stratixii 2015:11:24:18:49:55:SJ cbx_util_mgl 2015:11:24:18:49:55:SJ  VERSION_END
 39 | // synthesis VERILOG_INPUT_VERSION VERILOG_2001
 40 | // altera message_off 10463
 41 | 
 42 | 
 43 | 
 44 | //lpm_add_sub DEVICE_FAMILY="Stratix V" LPM_PIPELINE=1 LPM_WIDTH=9 aclr cin clken clock dataa datab result
 45 | //VERSION_BEGIN 15.1 cbx_cycloneii 2015:11:24:18:49:55:SJ cbx_lpm_add_sub 2015:11:24:18:49:55:SJ cbx_mgl 2015:11:24:20:43:33:SJ cbx_nadder 2015:11:24:18:49:55:SJ cbx_stratix 2015:11:24:18:49:55:SJ cbx_stratixii 2015:11:24:18:49:55:SJ  VERSION_END
 46 | 
 47 | 
 48 | //lpm_add_sub DEVICE_FAMILY="Stratix V" LPM_WIDTH=10 cin dataa datab result
 49 | //VERSION_BEGIN 15.1 cbx_cycloneii 2015:11:24:18:49:55:SJ cbx_lpm_add_sub 2015:11:24:18:49:55:SJ cbx_mgl 2015:11:24:20:43:33:SJ cbx_nadder 2015:11:24:18:49:55:SJ cbx_stratix 2015:11:24:18:49:55:SJ cbx_stratixii 2015:11:24:18:49:55:SJ  VERSION_END
 50 | 
 51 | 
 52 | //lpm_add_sub DEVICE_FAMILY="Stratix V" LPM_DIRECTION="SUB" LPM_PIPELINE=0 LPM_REPRESENTATION="UNSIGNED" LPM_WIDTH=10 dataa datab result
 53 | //VERSION_BEGIN 15.1 cbx_cycloneii 2015:11:24:18:49:55:SJ cbx_lpm_add_sub 2015:11:24:18:49:55:SJ cbx_mgl 2015:11:24:20:43:33:SJ cbx_nadder 2015:11:24:18:49:55:SJ cbx_stratix 2015:11:24:18:49:55:SJ cbx_stratixii 2015:11:24:18:49:55:SJ  VERSION_END
 54 | 
 55 | 
 56 | //lpm_add_sub DEVICE_FAMILY="Stratix V" LPM_PIPELINE=0 LPM_WIDTH=25 dataa datab result
 57 | //VERSION_BEGIN 15.1 cbx_cycloneii 2015:11:24:18:49:55:SJ cbx_lpm_add_sub 2015:11:24:18:49:55:SJ cbx_mgl 2015:11:24:20:43:33:SJ cbx_nadder 2015:11:24:18:49:55:SJ cbx_stratix 2015:11:24:18:49:55:SJ cbx_stratixii 2015:11:24:18:49:55:SJ  VERSION_END
 58 | 
 59 | 
 60 | //lpm_mult DEDICATED_MULTIPLIER_CIRCUITRY="YES" DEVICE_FAMILY="Stratix V" LPM_PIPELINE=2 LPM_REPRESENTATION="UNSIGNED" LPM_WIDTHA=24 LPM_WIDTHB=24 LPM_WIDTHP=48 LPM_WIDTHS=1 aclr clken clock dataa datab result
 61 | //VERSION_BEGIN 15.1 cbx_cycloneii 2015:11:24:18:49:55:SJ cbx_lpm_add_sub 2015:11:24:18:49:55:SJ cbx_lpm_mult 2015:11:24:18:49:55:SJ cbx_mgl 2015:11:24:20:43:33:SJ cbx_nadder 2015:11:24:18:49:55:SJ cbx_padd 2015:11:24:18:49:55:SJ cbx_stratix 2015:11:24:18:49:55:SJ cbx_stratixii 2015:11:24:18:49:55:SJ cbx_util_mgl 2015:11:24:18:49:55:SJ  VERSION_END
 62 | 
 63 | //synthesis_resources = 
 64 | //synopsys translate_off
 65 | `timescale 1 ps / 1 ps
 66 | //synopsys translate_on
 67 | module  float_mult_mult
 68 | 	( 
 69 | 	aclr,
 70 | 	clken,
 71 | 	clock,
 72 | 	dataa,
 73 | 	datab,
 74 | 	result) /* synthesis synthesis_clearbox=1 */;
 75 | 	input   aclr;
 76 | 	input   clken;
 77 | 	input   clock;
 78 | 	input   [23:0]  dataa;
 79 | 	input   [23:0]  datab;
 80 | 	output   [47:0]  result;
 81 | `ifndef ALTERA_RESERVED_QIS
 82 | // synopsys translate_off
 83 | `endif
 84 | 	tri0   aclr;
 85 | 	tri1   clken;
 86 | 	tri0   clock;
 87 | `ifndef ALTERA_RESERVED_QIS
 88 | // synopsys translate_on
 89 | `endif
 90 | 
 91 | 	reg  [23:0]  dataa_input_reg;
 92 | 	reg  [23:0]  datab_input_reg;
 93 | 	reg  [47:0]  result_output_reg;
 94 | 	wire [23:0]    dataa_wire;
 95 | 	wire [23:0]    datab_wire;
 96 | 	wire [47:0]    result_wire;
 97 | 
 98 | 
 99 | 	// synopsys translate_off
100 | 	initial
101 | 		dataa_input_reg = 0;
102 | 	// synopsys translate_on
103 | 	always @(posedge clock or posedge aclr)
104 | 		if (aclr == 1'b1)    dataa_input_reg <= 24'b0;
105 | 		else if (clken == 1'b1)	dataa_input_reg <= dataa;
106 | 	// synopsys translate_off
107 | 	initial
108 | 		datab_input_reg = 0;
109 | 	// synopsys translate_on
110 | 	always @(posedge clock or posedge aclr)
111 | 		if (aclr == 1'b1)    datab_input_reg <= 24'b0;
112 | 		else if (clken == 1'b1)	datab_input_reg <= datab;
113 | 	// synopsys translate_off
114 | 	initial
115 | 		result_output_reg = 0;
116 | 	// synopsys translate_on
117 | 	always @(posedge clock or posedge aclr)
118 | 		if (aclr == 1'b1)    result_output_reg <= 48'b0;
119 | 		else if (clken == 1'b1)	result_output_reg <= result_wire[47:0];
120 | 
121 | 	assign dataa_wire = dataa_input_reg;
122 | 	assign datab_wire = datab_input_reg;
123 | 	assign result_wire = dataa_wire * datab_wire;
124 | 	assign result = ({result_output_reg});
125 | 
126 | endmodule //float_mult_mult
127 | 
128 | //synthesis_resources = lut 55 reg 136 
129 | //synopsys translate_off
130 | `timescale 1 ps / 1 ps
131 | //synopsys translate_on
132 | module  float_mult_altfp_mult
133 | 	( 
134 | 	clk_en,
135 | 	clock,
136 | 	dataa,
137 | 	datab,
138 | 	result) /* synthesis synthesis_clearbox=1 */;
139 | 	input   clk_en;
140 | 	input   clock;
141 | 	input   [31:0]  dataa;
142 | 	input   [31:0]  datab;
143 | 	output   [31:0]  result;
144 | `ifndef ALTERA_RESERVED_QIS
145 | // synopsys translate_off
146 | `endif
147 | 	tri1   clk_en;
148 | `ifndef ALTERA_RESERVED_QIS
149 | // synopsys translate_on
150 | `endif
151 | 
152 | 	reg	dataa_exp_all_one_ff_p1;
153 | 	reg	dataa_exp_not_zero_ff_p1;
154 | 	reg	dataa_man_not_zero_ff_p1;
155 | 	reg	dataa_man_not_zero_ff_p2;
156 | 	reg	datab_exp_all_one_ff_p1;
157 | 	reg	datab_exp_not_zero_ff_p1;
158 | 	reg	datab_man_not_zero_ff_p1;
159 | 	reg	datab_man_not_zero_ff_p2;
160 | 	reg	[9:0]	delay_exp2_bias;
161 | 	reg	[9:0]	delay_exp_bias;
162 | 	reg	delay_man_product_msb;
163 | 	reg	delay_man_product_msb_p0;
164 | 	reg	[8:0]	exp_add_p1;
165 | 	reg	[7:0]	exp_result_ff;
166 | 	reg	input_is_infinity_dffe_0;
167 | 	reg	input_is_infinity_dffe_1;
168 | 	reg	input_is_infinity_ff1;
169 | 	reg	input_is_nan_dffe_0;
170 | 	reg	input_is_nan_dffe_1;
171 | 	reg	input_is_nan_ff1;
172 | 	reg	input_not_zero_dffe_0;
173 | 	reg	input_not_zero_dffe_1;
174 | 	reg	input_not_zero_ff1;
175 | 	reg	lsb_dffe;
176 | 	reg	[22:0]	man_result_ff;
177 | 	reg	[23:0]	man_round_p;
178 | 	reg	[24:0]	man_round_p2;
179 | 	reg	round_dffe;
180 | 	reg	[0:0]	sign_node_ff0;
181 | 	reg	[0:0]	sign_node_ff1;
182 | 	reg	[0:0]	sign_node_ff2;
183 | 	reg	[0:0]	sign_node_ff3;
184 | 	reg	[0:0]	sign_node_ff4;
185 | 	reg	sticky_dffe;
186 | 	(* ALTERA_ATTRIBUTE = {"POWER_UP_LEVEL=LOW"} *)
187 | 	reg	[8:0]	wire_exp_add_adder_pipeline_dffe_Q;
188 | 	wire	[8:0]	wire_exp_add_adder_pipeline_dffe_D;
189 | 	wire	[9:0]	wire_exp_add_adder_result_int;
190 | 	wire	wire_exp_add_adder_aclr;
191 | 	wire	wire_exp_add_adder_cin;
192 | 	wire	wire_exp_add_adder_clken;
193 | 	wire	wire_exp_add_adder_clock;
194 | 	wire	[8:0]	wire_exp_add_adder_dataa;
195 | 	wire	[8:0]	wire_exp_add_adder_datab;
196 | 	wire	[8:0]	wire_exp_add_adder_result;
197 | 	wire	[10:0]	wire_exp_adj_adder_result_int;
198 | 	wire	wire_exp_adj_adder_cin;
199 | 	wire	[9:0]	wire_exp_adj_adder_dataa;
200 | 	wire	[9:0]	wire_exp_adj_adder_datab;
201 | 	wire	[9:0]	wire_exp_adj_adder_result;
202 | 	wire	[9:0]	wire_exp_bias_subtr_dataa;
203 | 	wire	[9:0]	wire_exp_bias_subtr_datab;
204 | 	wire	[9:0]	wire_exp_bias_subtr_result;
205 | 	wire	[24:0]	wire_man_round_adder_dataa;
206 | 	wire	[24:0]	wire_man_round_adder_datab;
207 | 	wire	[24:0]	wire_man_round_adder_result;
208 | 	wire  [23:0]   wire_man_product2_mult_dataa;
209 | 	wire  [23:0]   wire_man_product2_mult_datab;
210 | 	wire  [47:0]   wire_man_product2_mult_result;
211 | 	wire aclr;
212 | 	wire  [9:0]  bias;
213 | 	wire  [7:0]  dataa_exp_all_one;
214 | 	wire  [7:0]  dataa_exp_not_zero;
215 | 	wire  [22:0]  dataa_man_not_zero;
216 | 	wire  [7:0]  datab_exp_all_one;
217 | 	wire  [7:0]  datab_exp_not_zero;
218 | 	wire  [22:0]  datab_man_not_zero;
219 | 	wire  exp_is_inf;
220 | 	wire  exp_is_zero;
221 | 	wire  [9:0]  expmod;
222 | 	wire  [7:0]  inf_num;
223 | 	wire  lsb_bit;
224 | 	wire  [23:0]  man_result_round;
225 | 	wire  [24:0]  man_shift_full;
226 | 	wire  [7:0]  result_exp_all_one;
227 | 	wire  [8:0]  result_exp_not_zero;
228 | 	wire  round_bit;
229 | 	wire  round_carry;
230 | 	wire  [22:0]  sticky_bit;
231 | 
232 | 	// synopsys translate_off
233 | 	initial
234 | 		dataa_exp_all_one_ff_p1 = 0;
235 | 	// synopsys translate_on
236 | 	always @ ( posedge clock or  posedge aclr)
237 | 		if (aclr == 1'b1) dataa_exp_all_one_ff_p1 <= 1'b0;
238 | 		else if  (clk_en == 1'b1)   dataa_exp_all_one_ff_p1 <= dataa_exp_all_one[7];
239 | 	// synopsys translate_off
240 | 	initial
241 | 		dataa_exp_not_zero_ff_p1 = 0;
242 | 	// synopsys translate_on
243 | 	always @ ( posedge clock or  posedge aclr)
244 | 		if (aclr == 1'b1) dataa_exp_not_zero_ff_p1 <= 1'b0;
245 | 		else if  (clk_en == 1'b1)   dataa_exp_not_zero_ff_p1 <= dataa_exp_not_zero[7];
246 | 	// synopsys translate_off
247 | 	initial
248 | 		dataa_man_not_zero_ff_p1 = 0;
249 | 	// synopsys translate_on
250 | 	always @ ( posedge clock or  posedge aclr)
251 | 		if (aclr == 1'b1) dataa_man_not_zero_ff_p1 <= 1'b0;
252 | 		else if  (clk_en == 1'b1)   dataa_man_not_zero_ff_p1 <= dataa_man_not_zero[10];
253 | 	// synopsys translate_off
254 | 	initial
255 | 		dataa_man_not_zero_ff_p2 = 0;
256 | 	// synopsys translate_on
257 | 	always @ ( posedge clock or  posedge aclr)
258 | 		if (aclr == 1'b1) dataa_man_not_zero_ff_p2 <= 1'b0;
259 | 		else if  (clk_en == 1'b1)   dataa_man_not_zero_ff_p2 <= dataa_man_not_zero[22];
260 | 	// synopsys translate_off
261 | 	initial
262 | 		datab_exp_all_one_ff_p1 = 0;
263 | 	// synopsys translate_on
264 | 	always @ ( posedge clock or  posedge aclr)
265 | 		if (aclr == 1'b1) datab_exp_all_one_ff_p1 <= 1'b0;
266 | 		else if  (clk_en == 1'b1)   datab_exp_all_one_ff_p1 <= datab_exp_all_one[7];
267 | 	// synopsys translate_off
268 | 	initial
269 | 		datab_exp_not_zero_ff_p1 = 0;
270 | 	// synopsys translate_on
271 | 	always @ ( posedge clock or  posedge aclr)
272 | 		if (aclr == 1'b1) datab_exp_not_zero_ff_p1 <= 1'b0;
273 | 		else if  (clk_en == 1'b1)   datab_exp_not_zero_ff_p1 <= datab_exp_not_zero[7];
274 | 	// synopsys translate_off
275 | 	initial
276 | 		datab_man_not_zero_ff_p1 = 0;
277 | 	// synopsys translate_on
278 | 	always @ ( posedge clock or  posedge aclr)
279 | 		if (aclr == 1'b1) datab_man_not_zero_ff_p1 <= 1'b0;
280 | 		else if  (clk_en == 1'b1)   datab_man_not_zero_ff_p1 <= datab_man_not_zero[10];
281 | 	// synopsys translate_off
282 | 	initial
283 | 		datab_man_not_zero_ff_p2 = 0;
284 | 	// synopsys translate_on
285 | 	always @ ( posedge clock or  posedge aclr)
286 | 		if (aclr == 1'b1) datab_man_not_zero_ff_p2 <= 1'b0;
287 | 		else if  (clk_en == 1'b1)   datab_man_not_zero_ff_p2 <= datab_man_not_zero[22];
288 | 	// synopsys translate_off
289 | 	initial
290 | 		delay_exp2_bias = 0;
291 | 	// synopsys translate_on
292 | 	always @ ( posedge clock or  posedge aclr)
293 | 		if (aclr == 1'b1) delay_exp2_bias <= 10'b0;
294 | 		else if  (clk_en == 1'b1)   delay_exp2_bias <= delay_exp_bias;
295 | 	// synopsys translate_off
296 | 	initial
297 | 		delay_exp_bias = 0;
298 | 	// synopsys translate_on
299 | 	always @ ( posedge clock or  posedge aclr)
300 | 		if (aclr == 1'b1) delay_exp_bias <= 10'b0;
301 | 		else if  (clk_en == 1'b1)   delay_exp_bias <= wire_exp_bias_subtr_result;
302 | 	// synopsys translate_off
303 | 	initial
304 | 		delay_man_product_msb = 0;
305 | 	// synopsys translate_on
306 | 	always @ ( posedge clock or  posedge aclr)
307 | 		if (aclr == 1'b1) delay_man_product_msb <= 1'b0;
308 | 		else if  (clk_en == 1'b1)   delay_man_product_msb <= delay_man_product_msb_p0;
309 | 	// synopsys translate_off
310 | 	initial
311 | 		delay_man_product_msb_p0 = 0;
312 | 	// synopsys translate_on
313 | 	always @ ( posedge clock or  posedge aclr)
314 | 		if (aclr == 1'b1) delay_man_product_msb_p0 <= 1'b0;
315 | 		else if  (clk_en == 1'b1)   delay_man_product_msb_p0 <= wire_man_product2_mult_result[47];
316 | 	// synopsys translate_off
317 | 	initial
318 | 		exp_add_p1 = 0;
319 | 	// synopsys translate_on
320 | 	always @ ( posedge clock or  posedge aclr)
321 | 		if (aclr == 1'b1) exp_add_p1 <= 9'b0;
322 | 		else if  (clk_en == 1'b1)   exp_add_p1 <= wire_exp_add_adder_result;
323 | 	// synopsys translate_off
324 | 	initial
325 | 		exp_result_ff = 0;
326 | 	// synopsys translate_on
327 | 	always @ ( posedge clock or  posedge aclr)
328 | 		if (aclr == 1'b1) exp_result_ff <= 8'b0;
329 | 		else if  (clk_en == 1'b1)   exp_result_ff <= ((inf_num & {8{((exp_is_inf | input_is_infinity_ff1) | input_is_nan_ff1)}}) | ((wire_exp_adj_adder_result[7:0] & {8{(~ exp_is_zero)}}) & {8{input_not_zero_ff1}}));
330 | 	// synopsys translate_off
331 | 	initial
332 | 		input_is_infinity_dffe_0 = 0;
333 | 	// synopsys translate_on
334 | 	always @ ( posedge clock or  posedge aclr)
335 | 		if (aclr == 1'b1) input_is_infinity_dffe_0 <= 1'b0;
336 | 		else if  (clk_en == 1'b1)   input_is_infinity_dffe_0 <= ((dataa_exp_all_one_ff_p1 & (~ (dataa_man_not_zero_ff_p1 | dataa_man_not_zero_ff_p2))) | (datab_exp_all_one_ff_p1 & (~ (datab_man_not_zero_ff_p1 | datab_man_not_zero_ff_p2))));
337 | 	// synopsys translate_off
338 | 	initial
339 | 		input_is_infinity_dffe_1 = 0;
340 | 	// synopsys translate_on
341 | 	always @ ( posedge clock or  posedge aclr)
342 | 		if (aclr == 1'b1) input_is_infinity_dffe_1 <= 1'b0;
343 | 		else if  (clk_en == 1'b1)   input_is_infinity_dffe_1 <= input_is_infinity_dffe_0;
344 | 	// synopsys translate_off
345 | 	initial
346 | 		input_is_infinity_ff1 = 0;
347 | 	// synopsys translate_on
348 | 	always @ ( posedge clock or  posedge aclr)
349 | 		if (aclr == 1'b1) input_is_infinity_ff1 <= 1'b0;
350 | 		else if  (clk_en == 1'b1)   input_is_infinity_ff1 <= input_is_infinity_dffe_1;
351 | 	// synopsys translate_off
352 | 	initial
353 | 		input_is_nan_dffe_0 = 0;
354 | 	// synopsys translate_on
355 | 	always @ ( posedge clock or  posedge aclr)
356 | 		if (aclr == 1'b1) input_is_nan_dffe_0 <= 1'b0;
357 | 		else if  (clk_en == 1'b1)   input_is_nan_dffe_0 <= ((dataa_exp_all_one_ff_p1 & (dataa_man_not_zero_ff_p1 | dataa_man_not_zero_ff_p2)) | (datab_exp_all_one_ff_p1 & (datab_man_not_zero_ff_p1 | datab_man_not_zero_ff_p2)));
358 | 	// synopsys translate_off
359 | 	initial
360 | 		input_is_nan_dffe_1 = 0;
361 | 	// synopsys translate_on
362 | 	always @ ( posedge clock or  posedge aclr)
363 | 		if (aclr == 1'b1) input_is_nan_dffe_1 <= 1'b0;
364 | 		else if  (clk_en == 1'b1)   input_is_nan_dffe_1 <= input_is_nan_dffe_0;
365 | 	// synopsys translate_off
366 | 	initial
367 | 		input_is_nan_ff1 = 0;
368 | 	// synopsys translate_on
369 | 	always @ ( posedge clock or  posedge aclr)
370 | 		if (aclr == 1'b1) input_is_nan_ff1 <= 1'b0;
371 | 		else if  (clk_en == 1'b1)   input_is_nan_ff1 <= input_is_nan_dffe_1;
372 | 	// synopsys translate_off
373 | 	initial
374 | 		input_not_zero_dffe_0 = 0;
375 | 	// synopsys translate_on
376 | 	always @ ( posedge clock or  posedge aclr)
377 | 		if (aclr == 1'b1) input_not_zero_dffe_0 <= 1'b0;
378 | 		else if  (clk_en == 1'b1)   input_not_zero_dffe_0 <= (dataa_exp_not_zero_ff_p1 & datab_exp_not_zero_ff_p1);
379 | 	// synopsys translate_off
380 | 	initial
381 | 		input_not_zero_dffe_1 = 0;
382 | 	// synopsys translate_on
383 | 	always @ ( posedge clock or  posedge aclr)
384 | 		if (aclr == 1'b1) input_not_zero_dffe_1 <= 1'b0;
385 | 		else if  (clk_en == 1'b1)   input_not_zero_dffe_1 <= input_not_zero_dffe_0;
386 | 	// synopsys translate_off
387 | 	initial
388 | 		input_not_zero_ff1 = 0;
389 | 	// synopsys translate_on
390 | 	always @ ( posedge clock or  posedge aclr)
391 | 		if (aclr == 1'b1) input_not_zero_ff1 <= 1'b0;
392 | 		else if  (clk_en == 1'b1)   input_not_zero_ff1 <= input_not_zero_dffe_1;
393 | 	// synopsys translate_off
394 | 	initial
395 | 		lsb_dffe = 0;
396 | 	// synopsys translate_on
397 | 	always @ ( posedge clock or  posedge aclr)
398 | 		if (aclr == 1'b1) lsb_dffe <= 1'b0;
399 | 		else if  (clk_en == 1'b1)   lsb_dffe <= lsb_bit;
400 | 	// synopsys translate_off
401 | 	initial
402 | 		man_result_ff = 0;
403 | 	// synopsys translate_on
404 | 	always @ ( posedge clock or  posedge aclr)
405 | 		if (aclr == 1'b1) man_result_ff <= 23'b0;
406 | 		else if  (clk_en == 1'b1)   man_result_ff <= {((((((man_result_round[22] & input_not_zero_ff1) & (~ input_is_infinity_ff1)) & (~ exp_is_inf)) & (~ exp_is_zero)) | (input_is_infinity_ff1 & (~ input_not_zero_ff1))) | input_is_nan_ff1), (((((man_result_round[21:0] & {22{input_not_zero_ff1}}) & {22{(~ input_is_infinity_ff1)}}) & {22{(~ exp_is_inf)}}) & {22{(~ exp_is_zero)}}) & {22{(~ input_is_nan_ff1)}})};
407 | 	// synopsys translate_off
408 | 	initial
409 | 		man_round_p = 0;
410 | 	// synopsys translate_on
411 | 	always @ ( posedge clock or  posedge aclr)
412 | 		if (aclr == 1'b1) man_round_p <= 24'b0;
413 | 		else if  (clk_en == 1'b1)   man_round_p <= man_shift_full[24:1];
414 | 	// synopsys translate_off
415 | 	initial
416 | 		man_round_p2 = 0;
417 | 	// synopsys translate_on
418 | 	always @ ( posedge clock or  posedge aclr)
419 | 		if (aclr == 1'b1) man_round_p2 <= 25'b0;
420 | 		else if  (clk_en == 1'b1)   man_round_p2 <= wire_man_round_adder_result;
421 | 	// synopsys translate_off
422 | 	initial
423 | 		round_dffe = 0;
424 | 	// synopsys translate_on
425 | 	always @ ( posedge clock or  posedge aclr)
426 | 		if (aclr == 1'b1) round_dffe <= 1'b0;
427 | 		else if  (clk_en == 1'b1)   round_dffe <= round_bit;
428 | 	// synopsys translate_off
429 | 	initial
430 | 		sign_node_ff0 = 0;
431 | 	// synopsys translate_on
432 | 	always @ ( posedge clock or  posedge aclr)
433 | 		if (aclr == 1'b1) sign_node_ff0 <= 1'b0;
434 | 		else if  (clk_en == 1'b1)   sign_node_ff0 <= (dataa[31] ^ datab[31]);
435 | 	// synopsys translate_off
436 | 	initial
437 | 		sign_node_ff1 = 0;
438 | 	// synopsys translate_on
439 | 	always @ ( posedge clock or  posedge aclr)
440 | 		if (aclr == 1'b1) sign_node_ff1 <= 1'b0;
441 | 		else if  (clk_en == 1'b1)   sign_node_ff1 <= sign_node_ff0[0:0];
442 | 	// synopsys translate_off
443 | 	initial
444 | 		sign_node_ff2 = 0;
445 | 	// synopsys translate_on
446 | 	always @ ( posedge clock or  posedge aclr)
447 | 		if (aclr == 1'b1) sign_node_ff2 <= 1'b0;
448 | 		else if  (clk_en == 1'b1)   sign_node_ff2 <= sign_node_ff1[0:0];
449 | 	// synopsys translate_off
450 | 	initial
451 | 		sign_node_ff3 = 0;
452 | 	// synopsys translate_on
453 | 	always @ ( posedge clock or  posedge aclr)
454 | 		if (aclr == 1'b1) sign_node_ff3 <= 1'b0;
455 | 		else if  (clk_en == 1'b1)   sign_node_ff3 <= sign_node_ff2[0:0];
456 | 	// synopsys translate_off
457 | 	initial
458 | 		sign_node_ff4 = 0;
459 | 	// synopsys translate_on
460 | 	always @ ( posedge clock or  posedge aclr)
461 | 		if (aclr == 1'b1) sign_node_ff4 <= 1'b0;
462 | 		else if  (clk_en == 1'b1)   sign_node_ff4 <= sign_node_ff3[0:0];
463 | 	// synopsys translate_off
464 | 	initial
465 | 		sticky_dffe = 0;
466 | 	// synopsys translate_on
467 | 	always @ ( posedge clock or  posedge aclr)
468 | 		if (aclr == 1'b1) sticky_dffe <= 1'b0;
469 | 		else if  (clk_en == 1'b1)   sticky_dffe <= sticky_bit[22];
470 | 	assign
471 | 		wire_exp_add_adder_result_int = {wire_exp_add_adder_dataa, wire_exp_add_adder_cin} + {wire_exp_add_adder_datab, wire_exp_add_adder_cin};
472 | 	//synopsys translate_off
473 | 	initial
474 | 		wire_exp_add_adder_pipeline_dffe_Q = 0;
475 | 	//synopsys translate_on
476 | 	always @(posedge wire_exp_add_adder_clock or posedge wire_exp_add_adder_aclr)
477 | 		if (wire_exp_add_adder_aclr == 1'b1) wire_exp_add_adder_pipeline_dffe_Q <= 9'b0;
478 | 		else if (wire_exp_add_adder_clken == 1'b1) wire_exp_add_adder_pipeline_dffe_Q <= wire_exp_add_adder_pipeline_dffe_D;
479 | 	assign
480 | 		wire_exp_add_adder_result = wire_exp_add_adder_pipeline_dffe_Q[8:0],
481 | 		wire_exp_add_adder_pipeline_dffe_D[8:0] = wire_exp_add_adder_result_int[9:1];
482 | 	assign
483 | 		wire_exp_add_adder_aclr = aclr,
484 | 		wire_exp_add_adder_cin = 1'b0,
485 | 		wire_exp_add_adder_clken = clk_en,
486 | 		wire_exp_add_adder_clock = clock,
487 | 		wire_exp_add_adder_dataa = {1'b0, dataa[30:23]},
488 | 		wire_exp_add_adder_datab = {1'b0, datab[30:23]};
489 | 	assign
490 | 		wire_exp_adj_adder_result_int = {wire_exp_adj_adder_dataa, wire_exp_adj_adder_cin} + {wire_exp_adj_adder_datab, wire_exp_adj_adder_cin};
491 | 	assign
492 | 		wire_exp_adj_adder_result = wire_exp_adj_adder_result_int[10:1];
493 | 	assign
494 | 		wire_exp_adj_adder_cin = 1'b0,
495 | 		wire_exp_adj_adder_dataa = delay_exp2_bias,
496 | 		wire_exp_adj_adder_datab = expmod;
497 | 	assign
498 | 		wire_exp_bias_subtr_result = wire_exp_bias_subtr_dataa - wire_exp_bias_subtr_datab;
499 | 	assign
500 | 		wire_exp_bias_subtr_dataa = {1'b0, exp_add_p1[8:0]},
501 | 		wire_exp_bias_subtr_datab = {bias[9:0]};
502 | 	assign
503 | 		wire_man_round_adder_result = wire_man_round_adder_dataa + wire_man_round_adder_datab;
504 | 	assign
505 | 		wire_man_round_adder_dataa = {1'b0, man_round_p},
506 | 		wire_man_round_adder_datab = {{24{1'b0}}, round_carry};
507 | 	float_mult_mult   man_product2_mult
508 | 	( 
509 | 	.aclr(aclr),
510 | 	.clken(clk_en),
511 | 	.clock(clock),
512 | 	.dataa({1'b1, dataa[22:0]}),
513 | 	.datab({1'b1, datab[22:0]}),
514 | 	.result(wire_man_product2_mult_result));
515 | 	assign
516 | 		aclr = 1'b0,
517 | 		bias = {{3{1'b0}}, {7{1'b1}}},
518 | 		dataa_exp_all_one = {(dataa[30] & dataa_exp_all_one[6]), (dataa[29] & dataa_exp_all_one[5]), (dataa[28] & dataa_exp_all_one[4]), (dataa[27] & dataa_exp_all_one[3]), (dataa[26] & dataa_exp_all_one[2]), (dataa[25] & dataa_exp_all_one[1]), (dataa[24] & dataa_exp_all_one[0]), dataa[23]},
519 | 		dataa_exp_not_zero = {(dataa[30] | dataa_exp_not_zero[6]), (dataa[29] | dataa_exp_not_zero[5]), (dataa[28] | dataa_exp_not_zero[4]), (dataa[27] | dataa_exp_not_zero[3]), (dataa[26] | dataa_exp_not_zero[2]), (dataa[25] | dataa_exp_not_zero[1]), (dataa[24] | dataa_exp_not_zero[0]), dataa[23]},
520 | 		dataa_man_not_zero = {(dataa[22] | dataa_man_not_zero[21]), (dataa[21] | dataa_man_not_zero[20]), (dataa[20] | dataa_man_not_zero[19]), (dataa[19] | dataa_man_not_zero[18]), (dataa[18] | dataa_man_not_zero[17]), (dataa[17] | dataa_man_not_zero[16]), (dataa[16] | dataa_man_not_zero[15]), (dataa[15] | dataa_man_not_zero[14]), (dataa[14] | dataa_man_not_zero[13]), (dataa[13] | dataa_man_not_zero[12]), (dataa[12] | dataa_man_not_zero[11]), dataa[11], (dataa[10] | dataa_man_not_zero[9]), (dataa[9] | dataa_man_not_zero[8]), (dataa[8] | dataa_man_not_zero[7]), (dataa[7] | dataa_man_not_zero[6]), (dataa[6] | dataa_man_not_zero[5]), (dataa[5] | dataa_man_not_zero[4]), (dataa[4] | dataa_man_not_zero[3]), (dataa[3] | dataa_man_not_zero[2]), (dataa[2] | dataa_man_not_zero[1]), (dataa[1] | dataa_man_not_zero[0]), dataa[0]},
521 | 		datab_exp_all_one = {(datab[30] & datab_exp_all_one[6]), (datab[29] & datab_exp_all_one[5]), (datab[28] & datab_exp_all_one[4]), (datab[27] & datab_exp_all_one[3]), (datab[26] & datab_exp_all_one[2]), (datab[25] & datab_exp_all_one[1]), (datab[24] & datab_exp_all_one[0]), datab[23]},
522 | 		datab_exp_not_zero = {(datab[30] | datab_exp_not_zero[6]), (datab[29] | datab_exp_not_zero[5]), (datab[28] | datab_exp_not_zero[4]), (datab[27] | datab_exp_not_zero[3]), (datab[26] | datab_exp_not_zero[2]), (datab[25] | datab_exp_not_zero[1]), (datab[24] | datab_exp_not_zero[0]), datab[23]},
523 | 		datab_man_not_zero = {(datab[22] | datab_man_not_zero[21]), (datab[21] | datab_man_not_zero[20]), (datab[20] | datab_man_not_zero[19]), (datab[19] | datab_man_not_zero[18]), (datab[18] | datab_man_not_zero[17]), (datab[17] | datab_man_not_zero[16]), (datab[16] | datab_man_not_zero[15]), (datab[15] | datab_man_not_zero[14]), (datab[14] | datab_man_not_zero[13]), (datab[13] | datab_man_not_zero[12]), (datab[12] | datab_man_not_zero[11]), datab[11], (datab[10] | datab_man_not_zero[9]), (datab[9] | datab_man_not_zero[8]), (datab[8] | datab_man_not_zero[7]), (datab[7] | datab_man_not_zero[6]), (datab[6] | datab_man_not_zero[5]), (datab[5] | datab_man_not_zero[4]), (datab[4] | datab_man_not_zero[3]), (datab[3] | datab_man_not_zero[2]), (datab[2] | datab_man_not_zero[1]), (datab[1] | datab_man_not_zero[0]), datab[0]},
524 | 		exp_is_inf = (((~ wire_exp_adj_adder_result[9]) & wire_exp_adj_adder_result[8]) | ((~ wire_exp_adj_adder_result[8]) & result_exp_all_one[7])),
525 | 		exp_is_zero = (wire_exp_adj_adder_result[9] | (~ result_exp_not_zero[8])),
526 | 		expmod = {{8{1'b0}}, (delay_man_product_msb & man_round_p2[24]), (delay_man_product_msb ^ man_round_p2[24])},
527 | 		inf_num = {8{1'b1}},
528 | 		lsb_bit = man_shift_full[1],
529 | 		man_result_round = ((man_round_p2[23:0] & {24{(~ man_round_p2[24])}}) | (man_round_p2[24:1] & {24{man_round_p2[24]}})),
530 | 		man_shift_full = ((wire_man_product2_mult_result[46:22] & {25{(~ wire_man_product2_mult_result[47])}}) | (wire_man_product2_mult_result[47:23] & {25{wire_man_product2_mult_result[47]}})),
531 | 		result = {sign_node_ff4[0:0], exp_result_ff[7:0], man_result_ff[22:0]},
532 | 		result_exp_all_one = {(result_exp_all_one[6] & wire_exp_adj_adder_result[7]), (result_exp_all_one[5] & wire_exp_adj_adder_result[6]), (result_exp_all_one[4] & wire_exp_adj_adder_result[5]), (result_exp_all_one[3] & wire_exp_adj_adder_result[4]), (result_exp_all_one[2] & wire_exp_adj_adder_result[3]), (result_exp_all_one[1] & wire_exp_adj_adder_result[2]), (result_exp_all_one[0] & wire_exp_adj_adder_result[1]), wire_exp_adj_adder_result[0]},
533 | 		result_exp_not_zero = {(result_exp_not_zero[7] | wire_exp_adj_adder_result[8]), (result_exp_not_zero[6] | wire_exp_adj_adder_result[7]), (result_exp_not_zero[5] | wire_exp_adj_adder_result[6]), (result_exp_not_zero[4] | wire_exp_adj_adder_result[5]), (result_exp_not_zero[3] | wire_exp_adj_adder_result[4]), (result_exp_not_zero[2] | wire_exp_adj_adder_result[3]), (result_exp_not_zero[1] | wire_exp_adj_adder_result[2]), (result_exp_not_zero[0] | wire_exp_adj_adder_result[1]), wire_exp_adj_adder_result[0]},
534 | 		round_bit = man_shift_full[0],
535 | 		round_carry = (round_dffe & (lsb_dffe | sticky_dffe)),
536 | 		sticky_bit = {(sticky_bit[21] | (wire_man_product2_mult_result[47] & wire_man_product2_mult_result[22])), (sticky_bit[20] | wire_man_product2_mult_result[21]), (sticky_bit[19] | wire_man_product2_mult_result[20]), (sticky_bit[18] | wire_man_product2_mult_result[19]), (sticky_bit[17] | wire_man_product2_mult_result[18]), (sticky_bit[16] | wire_man_product2_mult_result[17]), (sticky_bit[15] | wire_man_product2_mult_result[16]), (sticky_bit[14] | wire_man_product2_mult_result[15]), (sticky_bit[13] | wire_man_product2_mult_result[14]), (sticky_bit[12] | wire_man_product2_mult_result[13]), (sticky_bit[11] | wire_man_product2_mult_result[12]), (sticky_bit[10] | wire_man_product2_mult_result[11]), (sticky_bit[9] | wire_man_product2_mult_result[10]), (sticky_bit[8] | wire_man_product2_mult_result[9]), (sticky_bit[7] | wire_man_product2_mult_result[8]), (sticky_bit[6] | wire_man_product2_mult_result[7]), (sticky_bit[5] | wire_man_product2_mult_result[6]), (sticky_bit[4] | wire_man_product2_mult_result[5]), (sticky_bit[3] | wire_man_product2_mult_result[4]), (sticky_bit[2] | wire_man_product2_mult_result[3]), (sticky_bit[1] | wire_man_product2_mult_result[2]), (sticky_bit[0] | wire_man_product2_mult_result[1]), wire_man_product2_mult_result[0]};
537 | endmodule //float_mult_altfp_mult
538 | //VALID FILE
539 | 
540 | 
541 | // synopsys translate_off
542 | `timescale 1 ps / 1 ps
543 | // synopsys translate_on
544 | module float_mult (
545 | 	clk_en,
546 | 	clock,
547 | 	dataa,
548 | 	datab,
549 | 	result)/* synthesis synthesis_clearbox = 1 */;
550 | 
551 | 	input	  clk_en;
552 | 	input	  clock;
553 | 	input	[31:0]  dataa;
554 | 	input	[31:0]  datab;
555 | 	output	[31:0]  result;
556 | 
557 | 	wire [31:0] sub_wire0;
558 | 	wire [31:0] result = sub_wire0[31:0];
559 | 
560 | 	float_mult_altfp_mult	float_mult_altfp_mult_component (
561 | 				.clk_en (clk_en),
562 | 				.clock (clock),
563 | 				.dataa (dataa),
564 | 				.datab (datab),
565 | 				.result (sub_wire0));
566 | 
567 | endmodule
568 | 
569 | // ============================================================
570 | // CNX file retrieval info
571 | // ============================================================
572 | // Retrieval info: LIBRARY: altera_mf altera_mf.altera_mf_components.all
573 | // Retrieval info: PRIVATE: FPM_FORMAT STRING "Single"
574 | // Retrieval info: PRIVATE: INTENDED_DEVICE_FAMILY STRING "Stratix V"
575 | // Retrieval info: CONSTANT: DEDICATED_MULTIPLIER_CIRCUITRY STRING "YES"
576 | // Retrieval info: CONSTANT: DENORMAL_SUPPORT STRING "NO"
577 | // Retrieval info: CONSTANT: EXCEPTION_HANDLING STRING "NO"
578 | // Retrieval info: CONSTANT: INTENDED_DEVICE_FAMILY STRING "UNUSED"
579 | // Retrieval info: CONSTANT: LPM_HINT STRING "UNUSED"
580 | // Retrieval info: CONSTANT: LPM_TYPE STRING "altfp_mult"
581 | // Retrieval info: CONSTANT: PIPELINE NUMERIC "5"
582 | // Retrieval info: CONSTANT: REDUCED_FUNCTIONALITY STRING "NO"
583 | // Retrieval info: CONSTANT: ROUNDING STRING "TO_NEAREST"
584 | // Retrieval info: CONSTANT: WIDTH_EXP NUMERIC "8"
585 | // Retrieval info: CONSTANT: WIDTH_MAN NUMERIC "23"
586 | // Retrieval info: USED_PORT: clk_en 0 0 0 0 INPUT NODEFVAL "clk_en"
587 | // Retrieval info: CONNECT: @clk_en 0 0 0 0 clk_en 0 0 0 0
588 | // Retrieval info: USED_PORT: clock 0 0 0 0 INPUT NODEFVAL "clock"
589 | // Retrieval info: CONNECT: @clock 0 0 0 0 clock 0 0 0 0
590 | // Retrieval info: USED_PORT: dataa 0 0 32 0 INPUT NODEFVAL "dataa[31..0]"
591 | // Retrieval info: CONNECT: @dataa 0 0 32 0 dataa 0 0 32 0
592 | // Retrieval info: USED_PORT: datab 0 0 32 0 INPUT NODEFVAL "datab[31..0]"
593 | // Retrieval info: CONNECT: @datab 0 0 32 0 datab 0 0 32 0
594 | // Retrieval info: USED_PORT: result 0 0 32 0 OUTPUT NODEFVAL "result[31..0]"
595 | // Retrieval info: CONNECT: result 0 0 32 0 @result 0 0 32 0
596 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_mult.v TRUE FALSE
597 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_mult.qip TRUE FALSE
598 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_mult.bsf TRUE TRUE
599 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_mult_inst.v TRUE TRUE
600 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_mult_bb.v TRUE TRUE
601 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_mult.inc TRUE TRUE
602 | // Retrieval info: GEN_FILE: TYPE_NORMAL float_mult.cmp TRUE TRUE
603 | // Retrieval info: PRIVATE: SYNTH_WRAPPER_GEN_POSTFIX NUMERIC "1"
604 | // Retrieval info: LIB_FILE: lpm
605 | 


--------------------------------------------------------------------------------
/rtl/qip/iplauncher_debug.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brianhill11/FPGA-CNN/23f4b55a8d7acdb33eaa3584de8e833e1448f8f6/rtl/qip/iplauncher_debug.log


--------------------------------------------------------------------------------
/rtl/qip/ram_2p.qip:
--------------------------------------------------------------------------------
1 | set_global_assignment -name IP_TOOL_NAME "RAM: 2-PORT"
2 | set_global_assignment -name IP_TOOL_VERSION "13.1"
3 | set_global_assignment -name VERILOG_FILE [file join $::quartus(qip_path) "ram_2p.v"]
4 | set_global_assignment -name MISC_FILE [file join $::quartus(qip_path) "ram_2p_bb.v"]
5 | 


--------------------------------------------------------------------------------
/rtl/qip/ram_2p.v:
--------------------------------------------------------------------------------
  1 | // megafunction wizard: %RAM: 2-PORT%
  2 | // GENERATION: STANDARD
  3 | // VERSION: WM1.0
  4 | // MODULE: altsyncram 
  5 | 
  6 | // ============================================================
  7 | // File Name: ram_2p.v
  8 | // Megafunction Name(s):
  9 | // 			altsyncram
 10 | //
 11 | // Simulation Library Files(s):
 12 | // 			altera_mf
 13 | // ============================================================
 14 | // ************************************************************
 15 | // THIS IS A WIZARD-GENERATED FILE. DO NOT EDIT THIS FILE!
 16 | //
 17 | // 13.1.4 Build 182 03/12/2014 Patches 4.26 SJ Full Version
 18 | // ************************************************************
 19 | 
 20 | 
 21 | //Copyright (C) 1991-2014 Altera Corporation
 22 | //Your use of Altera Corporation's design tools, logic functions 
 23 | //and other software and tools, and its AMPP partner logic 
 24 | //functions, and any output files from any of the foregoing 
 25 | //(including device programming or simulation files), and any 
 26 | //associated documentation or information are expressly subject 
 27 | //to the terms and conditions of the Altera Program License 
 28 | //Subscription Agreement, Altera MegaCore Function License 
 29 | //Agreement, or other applicable license agreement, including, 
 30 | //without limitation, that your use is for the sole purpose of 
 31 | //programming logic devices manufactured by Altera and sold by 
 32 | //Altera or its authorized distributors.  Please refer to the 
 33 | //applicable agreement for further details.
 34 | 
 35 | 
 36 | // synopsys translate_off
 37 | `timescale 1 ps / 1 ps
 38 | // synopsys translate_on
 39 | module ram_2p (
 40 | 	data,
 41 | 	rdaddress,
 42 | 	rdclock,
 43 | 	wraddress,
 44 | 	wrclock,
 45 | 	wren,
 46 | 	q);
 47 | 
 48 | 	input	[255:0]  data;
 49 | 	input	[7:0]  rdaddress;
 50 | 	input	  rdclock;
 51 | 	input	[7:0]  wraddress;
 52 | 	input	  wrclock;
 53 | 	input	  wren;
 54 | 	output	[255:0]  q;
 55 | `ifndef ALTERA_RESERVED_QIS
 56 | // synopsys translate_off
 57 | `endif
 58 | 	tri1	  wrclock;
 59 | 	tri0	  wren;
 60 | `ifndef ALTERA_RESERVED_QIS
 61 | // synopsys translate_on
 62 | `endif
 63 | 
 64 | 	wire [255:0] sub_wire0;
 65 | 	wire [255:0] q = sub_wire0[255:0];
 66 | 
 67 | 	altsyncram	altsyncram_component (
 68 | 				.address_a (wraddress),
 69 | 				.clock0 (wrclock),
 70 | 				.data_a (data),
 71 | 				.wren_a (wren),
 72 | 				.address_b (rdaddress),
 73 | 				.clock1 (rdclock),
 74 | 				.q_b (sub_wire0),
 75 | 				.aclr0 (1'b0),
 76 | 				.aclr1 (1'b0),
 77 | 				.addressstall_a (1'b0),
 78 | 				.addressstall_b (1'b0),
 79 | 				.byteena_a (1'b1),
 80 | 				.byteena_b (1'b1),
 81 | 				.clocken0 (1'b1),
 82 | 				.clocken1 (1'b1),
 83 | 				.clocken2 (1'b1),
 84 | 				.clocken3 (1'b1),
 85 | 				.data_b ({256{1'b1}}),
 86 | 				.eccstatus (),
 87 | 				.q_a (),
 88 | 				.rden_a (1'b1),
 89 | 				.rden_b (1'b1),
 90 | 				.wren_b (1'b0));
 91 | 	defparam
 92 | 		altsyncram_component.address_aclr_b = "NONE",
 93 | 		altsyncram_component.address_reg_b = "CLOCK1",
 94 | 		altsyncram_component.clock_enable_input_a = "BYPASS",
 95 | 		altsyncram_component.clock_enable_input_b = "BYPASS",
 96 | 		altsyncram_component.clock_enable_output_b = "BYPASS",
 97 | 		altsyncram_component.intended_device_family = "Stratix V",
 98 | 		altsyncram_component.lpm_type = "altsyncram",
 99 | 		altsyncram_component.numwords_a = 256,
100 | 		altsyncram_component.numwords_b = 256,
101 | 		altsyncram_component.operation_mode = "DUAL_PORT",
102 | 		altsyncram_component.outdata_aclr_b = "NONE",
103 | 		altsyncram_component.outdata_reg_b = "CLOCK1",
104 | 		altsyncram_component.power_up_uninitialized = "FALSE",
105 | 		altsyncram_component.widthad_a = 8,
106 | 		altsyncram_component.widthad_b = 8,
107 | 		altsyncram_component.width_a = 256,
108 | 		altsyncram_component.width_b = 256,
109 | 		altsyncram_component.width_byteena_a = 1;
110 | 
111 | 
112 | endmodule
113 | 
114 | // ============================================================
115 | // CNX file retrieval info
116 | // ============================================================
117 | // Retrieval info: PRIVATE: ADDRESSSTALL_A NUMERIC "0"
118 | // Retrieval info: PRIVATE: ADDRESSSTALL_B NUMERIC "0"
119 | // Retrieval info: PRIVATE: BYTEENA_ACLR_A NUMERIC "0"
120 | // Retrieval info: PRIVATE: BYTEENA_ACLR_B NUMERIC "0"
121 | // Retrieval info: PRIVATE: BYTE_ENABLE_A NUMERIC "0"
122 | // Retrieval info: PRIVATE: BYTE_ENABLE_B NUMERIC "0"
123 | // Retrieval info: PRIVATE: BYTE_SIZE NUMERIC "8"
124 | // Retrieval info: PRIVATE: BlankMemory NUMERIC "1"
125 | // Retrieval info: PRIVATE: CLOCK_ENABLE_INPUT_A NUMERIC "0"
126 | // Retrieval info: PRIVATE: CLOCK_ENABLE_INPUT_B NUMERIC "0"
127 | // Retrieval info: PRIVATE: CLOCK_ENABLE_OUTPUT_A NUMERIC "0"
128 | // Retrieval info: PRIVATE: CLOCK_ENABLE_OUTPUT_B NUMERIC "0"
129 | // Retrieval info: PRIVATE: CLRdata NUMERIC "0"
130 | // Retrieval info: PRIVATE: CLRq NUMERIC "0"
131 | // Retrieval info: PRIVATE: CLRrdaddress NUMERIC "0"
132 | // Retrieval info: PRIVATE: CLRrren NUMERIC "0"
133 | // Retrieval info: PRIVATE: CLRwraddress NUMERIC "0"
134 | // Retrieval info: PRIVATE: CLRwren NUMERIC "0"
135 | // Retrieval info: PRIVATE: Clock NUMERIC "1"
136 | // Retrieval info: PRIVATE: Clock_A NUMERIC "0"
137 | // Retrieval info: PRIVATE: Clock_B NUMERIC "0"
138 | // Retrieval info: PRIVATE: IMPLEMENT_IN_LES NUMERIC "0"
139 | // Retrieval info: PRIVATE: INDATA_ACLR_B NUMERIC "0"
140 | // Retrieval info: PRIVATE: INDATA_REG_B NUMERIC "0"
141 | // Retrieval info: PRIVATE: INIT_FILE_LAYOUT STRING "PORT_B"
142 | // Retrieval info: PRIVATE: INIT_TO_SIM_X NUMERIC "0"
143 | // Retrieval info: PRIVATE: INTENDED_DEVICE_FAMILY STRING "Stratix V"
144 | // Retrieval info: PRIVATE: JTAG_ENABLED NUMERIC "0"
145 | // Retrieval info: PRIVATE: JTAG_ID STRING "NONE"
146 | // Retrieval info: PRIVATE: MAXIMUM_DEPTH NUMERIC "0"
147 | // Retrieval info: PRIVATE: MEMSIZE NUMERIC "65536"
148 | // Retrieval info: PRIVATE: MEM_IN_BITS NUMERIC "0"
149 | // Retrieval info: PRIVATE: MIFfilename STRING ""
150 | // Retrieval info: PRIVATE: OPERATION_MODE NUMERIC "2"
151 | // Retrieval info: PRIVATE: OUTDATA_ACLR_B NUMERIC "0"
152 | // Retrieval info: PRIVATE: OUTDATA_REG_B NUMERIC "1"
153 | // Retrieval info: PRIVATE: RAM_BLOCK_TYPE NUMERIC "0"
154 | // Retrieval info: PRIVATE: READ_DURING_WRITE_MODE_MIXED_PORTS NUMERIC "2"
155 | // Retrieval info: PRIVATE: READ_DURING_WRITE_MODE_PORT_A NUMERIC "3"
156 | // Retrieval info: PRIVATE: READ_DURING_WRITE_MODE_PORT_B NUMERIC "3"
157 | // Retrieval info: PRIVATE: REGdata NUMERIC "1"
158 | // Retrieval info: PRIVATE: REGq NUMERIC "1"
159 | // Retrieval info: PRIVATE: REGrdaddress NUMERIC "1"
160 | // Retrieval info: PRIVATE: REGrren NUMERIC "1"
161 | // Retrieval info: PRIVATE: REGwraddress NUMERIC "1"
162 | // Retrieval info: PRIVATE: REGwren NUMERIC "1"
163 | // Retrieval info: PRIVATE: SYNTH_WRAPPER_GEN_POSTFIX STRING "0"
164 | // Retrieval info: PRIVATE: USE_DIFF_CLKEN NUMERIC "0"
165 | // Retrieval info: PRIVATE: UseDPRAM NUMERIC "1"
166 | // Retrieval info: PRIVATE: VarWidth NUMERIC "0"
167 | // Retrieval info: PRIVATE: WIDTH_READ_A NUMERIC "256"
168 | // Retrieval info: PRIVATE: WIDTH_READ_B NUMERIC "256"
169 | // Retrieval info: PRIVATE: WIDTH_WRITE_A NUMERIC "256"
170 | // Retrieval info: PRIVATE: WIDTH_WRITE_B NUMERIC "256"
171 | // Retrieval info: PRIVATE: WRADDR_ACLR_B NUMERIC "0"
172 | // Retrieval info: PRIVATE: WRADDR_REG_B NUMERIC "0"
173 | // Retrieval info: PRIVATE: WRCTRL_ACLR_B NUMERIC "0"
174 | // Retrieval info: PRIVATE: enable NUMERIC "0"
175 | // Retrieval info: PRIVATE: rden NUMERIC "0"
176 | // Retrieval info: LIBRARY: altera_mf altera_mf.altera_mf_components.all
177 | // Retrieval info: CONSTANT: ADDRESS_ACLR_B STRING "NONE"
178 | // Retrieval info: CONSTANT: ADDRESS_REG_B STRING "CLOCK1"
179 | // Retrieval info: CONSTANT: CLOCK_ENABLE_INPUT_A STRING "BYPASS"
180 | // Retrieval info: CONSTANT: CLOCK_ENABLE_INPUT_B STRING "BYPASS"
181 | // Retrieval info: CONSTANT: CLOCK_ENABLE_OUTPUT_B STRING "BYPASS"
182 | // Retrieval info: CONSTANT: INTENDED_DEVICE_FAMILY STRING "Stratix V"
183 | // Retrieval info: CONSTANT: LPM_TYPE STRING "altsyncram"
184 | // Retrieval info: CONSTANT: NUMWORDS_A NUMERIC "256"
185 | // Retrieval info: CONSTANT: NUMWORDS_B NUMERIC "256"
186 | // Retrieval info: CONSTANT: OPERATION_MODE STRING "DUAL_PORT"
187 | // Retrieval info: CONSTANT: OUTDATA_ACLR_B STRING "NONE"
188 | // Retrieval info: CONSTANT: OUTDATA_REG_B STRING "CLOCK1"
189 | // Retrieval info: CONSTANT: POWER_UP_UNINITIALIZED STRING "FALSE"
190 | // Retrieval info: CONSTANT: WIDTHAD_A NUMERIC "8"
191 | // Retrieval info: CONSTANT: WIDTHAD_B NUMERIC "8"
192 | // Retrieval info: CONSTANT: WIDTH_A NUMERIC "256"
193 | // Retrieval info: CONSTANT: WIDTH_B NUMERIC "256"
194 | // Retrieval info: CONSTANT: WIDTH_BYTEENA_A NUMERIC "1"
195 | // Retrieval info: USED_PORT: data 0 0 256 0 INPUT NODEFVAL "data[255..0]"
196 | // Retrieval info: USED_PORT: q 0 0 256 0 OUTPUT NODEFVAL "q[255..0]"
197 | // Retrieval info: USED_PORT: rdaddress 0 0 8 0 INPUT NODEFVAL "rdaddress[7..0]"
198 | // Retrieval info: USED_PORT: rdclock 0 0 0 0 INPUT NODEFVAL "rdclock"
199 | // Retrieval info: USED_PORT: wraddress 0 0 8 0 INPUT NODEFVAL "wraddress[7..0]"
200 | // Retrieval info: USED_PORT: wrclock 0 0 0 0 INPUT VCC "wrclock"
201 | // Retrieval info: USED_PORT: wren 0 0 0 0 INPUT GND "wren"
202 | // Retrieval info: CONNECT: @address_a 0 0 8 0 wraddress 0 0 8 0
203 | // Retrieval info: CONNECT: @address_b 0 0 8 0 rdaddress 0 0 8 0
204 | // Retrieval info: CONNECT: @clock0 0 0 0 0 wrclock 0 0 0 0
205 | // Retrieval info: CONNECT: @clock1 0 0 0 0 rdclock 0 0 0 0
206 | // Retrieval info: CONNECT: @data_a 0 0 256 0 data 0 0 256 0
207 | // Retrieval info: CONNECT: @wren_a 0 0 0 0 wren 0 0 0 0
208 | // Retrieval info: CONNECT: q 0 0 256 0 @q_b 0 0 256 0
209 | // Retrieval info: GEN_FILE: TYPE_NORMAL ram_2p.v TRUE
210 | // Retrieval info: GEN_FILE: TYPE_NORMAL ram_2p.inc FALSE
211 | // Retrieval info: GEN_FILE: TYPE_NORMAL ram_2p.cmp FALSE
212 | // Retrieval info: GEN_FILE: TYPE_NORMAL ram_2p.bsf FALSE
213 | // Retrieval info: GEN_FILE: TYPE_NORMAL ram_2p_inst.v FALSE
214 | // Retrieval info: GEN_FILE: TYPE_NORMAL ram_2p_bb.v TRUE
215 | // Retrieval info: LIB_FILE: altera_mf
216 | 


--------------------------------------------------------------------------------
/rtl/qip/ram_2p_bb.v:
--------------------------------------------------------------------------------
  1 | // megafunction wizard: %RAM: 2-PORT%VBB%
  2 | // GENERATION: STANDARD
  3 | // VERSION: WM1.0
  4 | // MODULE: altsyncram 
  5 | 
  6 | // ============================================================
  7 | // File Name: ram_2p.v
  8 | // Megafunction Name(s):
  9 | // 			altsyncram
 10 | //
 11 | // Simulation Library Files(s):
 12 | // 			altera_mf
 13 | // ============================================================
 14 | // ************************************************************
 15 | // THIS IS A WIZARD-GENERATED FILE. DO NOT EDIT THIS FILE!
 16 | //
 17 | // 13.1.4 Build 182 03/12/2014 Patches 4.26 SJ Full Version
 18 | // ************************************************************
 19 | 
 20 | //Copyright (C) 1991-2014 Altera Corporation
 21 | //Your use of Altera Corporation's design tools, logic functions 
 22 | //and other software and tools, and its AMPP partner logic 
 23 | //functions, and any output files from any of the foregoing 
 24 | //(including device programming or simulation files), and any 
 25 | //associated documentation or information are expressly subject 
 26 | //to the terms and conditions of the Altera Program License 
 27 | //Subscription Agreement, Altera MegaCore Function License 
 28 | //Agreement, or other applicable license agreement, including, 
 29 | //without limitation, that your use is for the sole purpose of 
 30 | //programming logic devices manufactured by Altera and sold by 
 31 | //Altera or its authorized distributors.  Please refer to the 
 32 | //applicable agreement for further details.
 33 | 
 34 | module ram_2p (
 35 | 	data,
 36 | 	rdaddress,
 37 | 	rdclock,
 38 | 	wraddress,
 39 | 	wrclock,
 40 | 	wren,
 41 | 	q);
 42 | 
 43 | 	input	[255:0]  data;
 44 | 	input	[7:0]  rdaddress;
 45 | 	input	  rdclock;
 46 | 	input	[7:0]  wraddress;
 47 | 	input	  wrclock;
 48 | 	input	  wren;
 49 | 	output	[255:0]  q;
 50 | `ifndef ALTERA_RESERVED_QIS
 51 | // synopsys translate_off
 52 | `endif
 53 | 	tri1	  wrclock;
 54 | 	tri0	  wren;
 55 | `ifndef ALTERA_RESERVED_QIS
 56 | // synopsys translate_on
 57 | `endif
 58 | 
 59 | endmodule
 60 | 
 61 | // ============================================================
 62 | // CNX file retrieval info
 63 | // ============================================================
 64 | // Retrieval info: PRIVATE: ADDRESSSTALL_A NUMERIC "0"
 65 | // Retrieval info: PRIVATE: ADDRESSSTALL_B NUMERIC "0"
 66 | // Retrieval info: PRIVATE: BYTEENA_ACLR_A NUMERIC "0"
 67 | // Retrieval info: PRIVATE: BYTEENA_ACLR_B NUMERIC "0"
 68 | // Retrieval info: PRIVATE: BYTE_ENABLE_A NUMERIC "0"
 69 | // Retrieval info: PRIVATE: BYTE_ENABLE_B NUMERIC "0"
 70 | // Retrieval info: PRIVATE: BYTE_SIZE NUMERIC "8"
 71 | // Retrieval info: PRIVATE: BlankMemory NUMERIC "1"
 72 | // Retrieval info: PRIVATE: CLOCK_ENABLE_INPUT_A NUMERIC "0"
 73 | // Retrieval info: PRIVATE: CLOCK_ENABLE_INPUT_B NUMERIC "0"
 74 | // Retrieval info: PRIVATE: CLOCK_ENABLE_OUTPUT_A NUMERIC "0"
 75 | // Retrieval info: PRIVATE: CLOCK_ENABLE_OUTPUT_B NUMERIC "0"
 76 | // Retrieval info: PRIVATE: CLRdata NUMERIC "0"
 77 | // Retrieval info: PRIVATE: CLRq NUMERIC "0"
 78 | // Retrieval info: PRIVATE: CLRrdaddress NUMERIC "0"
 79 | // Retrieval info: PRIVATE: CLRrren NUMERIC "0"
 80 | // Retrieval info: PRIVATE: CLRwraddress NUMERIC "0"
 81 | // Retrieval info: PRIVATE: CLRwren NUMERIC "0"
 82 | // Retrieval info: PRIVATE: Clock NUMERIC "1"
 83 | // Retrieval info: PRIVATE: Clock_A NUMERIC "0"
 84 | // Retrieval info: PRIVATE: Clock_B NUMERIC "0"
 85 | // Retrieval info: PRIVATE: IMPLEMENT_IN_LES NUMERIC "0"
 86 | // Retrieval info: PRIVATE: INDATA_ACLR_B NUMERIC "0"
 87 | // Retrieval info: PRIVATE: INDATA_REG_B NUMERIC "0"
 88 | // Retrieval info: PRIVATE: INIT_FILE_LAYOUT STRING "PORT_B"
 89 | // Retrieval info: PRIVATE: INIT_TO_SIM_X NUMERIC "0"
 90 | // Retrieval info: PRIVATE: INTENDED_DEVICE_FAMILY STRING "Stratix V"
 91 | // Retrieval info: PRIVATE: JTAG_ENABLED NUMERIC "0"
 92 | // Retrieval info: PRIVATE: JTAG_ID STRING "NONE"
 93 | // Retrieval info: PRIVATE: MAXIMUM_DEPTH NUMERIC "0"
 94 | // Retrieval info: PRIVATE: MEMSIZE NUMERIC "65536"
 95 | // Retrieval info: PRIVATE: MEM_IN_BITS NUMERIC "0"
 96 | // Retrieval info: PRIVATE: MIFfilename STRING ""
 97 | // Retrieval info: PRIVATE: OPERATION_MODE NUMERIC "2"
 98 | // Retrieval info: PRIVATE: OUTDATA_ACLR_B NUMERIC "0"
 99 | // Retrieval info: PRIVATE: OUTDATA_REG_B NUMERIC "1"
100 | // Retrieval info: PRIVATE: RAM_BLOCK_TYPE NUMERIC "0"
101 | // Retrieval info: PRIVATE: READ_DURING_WRITE_MODE_MIXED_PORTS NUMERIC "2"
102 | // Retrieval info: PRIVATE: READ_DURING_WRITE_MODE_PORT_A NUMERIC "3"
103 | // Retrieval info: PRIVATE: READ_DURING_WRITE_MODE_PORT_B NUMERIC "3"
104 | // Retrieval info: PRIVATE: REGdata NUMERIC "1"
105 | // Retrieval info: PRIVATE: REGq NUMERIC "1"
106 | // Retrieval info: PRIVATE: REGrdaddress NUMERIC "1"
107 | // Retrieval info: PRIVATE: REGrren NUMERIC "1"
108 | // Retrieval info: PRIVATE: REGwraddress NUMERIC "1"
109 | // Retrieval info: PRIVATE: REGwren NUMERIC "1"
110 | // Retrieval info: PRIVATE: SYNTH_WRAPPER_GEN_POSTFIX STRING "0"
111 | // Retrieval info: PRIVATE: USE_DIFF_CLKEN NUMERIC "0"
112 | // Retrieval info: PRIVATE: UseDPRAM NUMERIC "1"
113 | // Retrieval info: PRIVATE: VarWidth NUMERIC "0"
114 | // Retrieval info: PRIVATE: WIDTH_READ_A NUMERIC "256"
115 | // Retrieval info: PRIVATE: WIDTH_READ_B NUMERIC "256"
116 | // Retrieval info: PRIVATE: WIDTH_WRITE_A NUMERIC "256"
117 | // Retrieval info: PRIVATE: WIDTH_WRITE_B NUMERIC "256"
118 | // Retrieval info: PRIVATE: WRADDR_ACLR_B NUMERIC "0"
119 | // Retrieval info: PRIVATE: WRADDR_REG_B NUMERIC "0"
120 | // Retrieval info: PRIVATE: WRCTRL_ACLR_B NUMERIC "0"
121 | // Retrieval info: PRIVATE: enable NUMERIC "0"
122 | // Retrieval info: PRIVATE: rden NUMERIC "0"
123 | // Retrieval info: LIBRARY: altera_mf altera_mf.altera_mf_components.all
124 | // Retrieval info: CONSTANT: ADDRESS_ACLR_B STRING "NONE"
125 | // Retrieval info: CONSTANT: ADDRESS_REG_B STRING "CLOCK1"
126 | // Retrieval info: CONSTANT: CLOCK_ENABLE_INPUT_A STRING "BYPASS"
127 | // Retrieval info: CONSTANT: CLOCK_ENABLE_INPUT_B STRING "BYPASS"
128 | // Retrieval info: CONSTANT: CLOCK_ENABLE_OUTPUT_B STRING "BYPASS"
129 | // Retrieval info: CONSTANT: INTENDED_DEVICE_FAMILY STRING "Stratix V"
130 | // Retrieval info: CONSTANT: LPM_TYPE STRING "altsyncram"
131 | // Retrieval info: CONSTANT: NUMWORDS_A NUMERIC "256"
132 | // Retrieval info: CONSTANT: NUMWORDS_B NUMERIC "256"
133 | // Retrieval info: CONSTANT: OPERATION_MODE STRING "DUAL_PORT"
134 | // Retrieval info: CONSTANT: OUTDATA_ACLR_B STRING "NONE"
135 | // Retrieval info: CONSTANT: OUTDATA_REG_B STRING "CLOCK1"
136 | // Retrieval info: CONSTANT: POWER_UP_UNINITIALIZED STRING "FALSE"
137 | // Retrieval info: CONSTANT: WIDTHAD_A NUMERIC "8"
138 | // Retrieval info: CONSTANT: WIDTHAD_B NUMERIC "8"
139 | // Retrieval info: CONSTANT: WIDTH_A NUMERIC "256"
140 | // Retrieval info: CONSTANT: WIDTH_B NUMERIC "256"
141 | // Retrieval info: CONSTANT: WIDTH_BYTEENA_A NUMERIC "1"
142 | // Retrieval info: USED_PORT: data 0 0 256 0 INPUT NODEFVAL "data[255..0]"
143 | // Retrieval info: USED_PORT: q 0 0 256 0 OUTPUT NODEFVAL "q[255..0]"
144 | // Retrieval info: USED_PORT: rdaddress 0 0 8 0 INPUT NODEFVAL "rdaddress[7..0]"
145 | // Retrieval info: USED_PORT: rdclock 0 0 0 0 INPUT NODEFVAL "rdclock"
146 | // Retrieval info: USED_PORT: wraddress 0 0 8 0 INPUT NODEFVAL "wraddress[7..0]"
147 | // Retrieval info: USED_PORT: wrclock 0 0 0 0 INPUT VCC "wrclock"
148 | // Retrieval info: USED_PORT: wren 0 0 0 0 INPUT GND "wren"
149 | // Retrieval info: CONNECT: @address_a 0 0 8 0 wraddress 0 0 8 0
150 | // Retrieval info: CONNECT: @address_b 0 0 8 0 rdaddress 0 0 8 0
151 | // Retrieval info: CONNECT: @clock0 0 0 0 0 wrclock 0 0 0 0
152 | // Retrieval info: CONNECT: @clock1 0 0 0 0 rdclock 0 0 0 0
153 | // Retrieval info: CONNECT: @data_a 0 0 256 0 data 0 0 256 0
154 | // Retrieval info: CONNECT: @wren_a 0 0 0 0 wren 0 0 0 0
155 | // Retrieval info: CONNECT: q 0 0 256 0 @q_b 0 0 256 0
156 | // Retrieval info: GEN_FILE: TYPE_NORMAL ram_2p.v TRUE
157 | // Retrieval info: GEN_FILE: TYPE_NORMAL ram_2p.inc FALSE
158 | // Retrieval info: GEN_FILE: TYPE_NORMAL ram_2p.cmp FALSE
159 | // Retrieval info: GEN_FILE: TYPE_NORMAL ram_2p.bsf FALSE
160 | // Retrieval info: GEN_FILE: TYPE_NORMAL ram_2p_inst.v FALSE
161 | // Retrieval info: GEN_FILE: TYPE_NORMAL ram_2p_bb.v TRUE
162 | // Retrieval info: LIB_FILE: altera_mf
163 | 


--------------------------------------------------------------------------------
/rtl/relu_backward_layer.sv:
--------------------------------------------------------------------------------
 1 | 
 2 | module relu_backward_layer	#(parameter WIDTH = 16, parameter NEGATIVE_SLOPE = 0.0)
 3 | 									(
 4 | 										input		logic				clk,						//clock signal
 5 | 										input		logic				reset,					//reset signal
 6 | 										input		logic	[7:0]		id,						//id value
 7 | 										input		logic	[31:0]	in_vec	[WIDTH-1:0],//vector of floats
 8 | 										output	reg	[7:0]		id_out,					//output id value
 9 | 										output	reg	[31:0]	out_vec	[WIDTH-1:0]	//vector of floats
10 | 									);
11 | 
12 | 	generate
13 | 		genvar i;
14 | 
15 | 			for (i = 0; i < WIDTH; i = i+1) begin : RELU_BACKWARD
16 | 				relu_backward_opt #(.NEGATIVE_SLOPE(NEGATIVE_SLOPE)) 
17 | 						relu_ops (	.clk(clk), .reset(reset), 
18 | 										.in_data(in_vec[i]), .out_data(out_vec[i]) );
19 | 			end
20 | 	endgenerate
21 | 	
22 | 	always @(posedge clk) begin
23 | 		id_out <= id;
24 | 	end
25 | endmodule
26 | 


--------------------------------------------------------------------------------
/rtl/relu_backward_layer.sv.bak:
--------------------------------------------------------------------------------
 1 | 
 2 | module relu_backward_layer #(parameter WIDTH = 4, parameter NEGATIVE_SLOPE = 0)
 3 |                             ( input   logic           clk,                //clock signal
 4 |                             input   logic           reset,              //reset signal
 5 |                             input   logic [31:0]    in_vec  [WIDTH-1:0],//vector of floats
 6 |                             output  reg   [31:0]    out_vec [WIDTH-1:0] //vector of floats
 7 |                           );
 8 | 
 9 |     //parameter NEGATIVE_SLOPE = 0;
10 |     //parameter WIDTH = 4;
11 |  
12 |     generate
13 |         genvar i;
14 |     
15 |         for (i = 0; i < WIDTH; i = i+1) begin
16 |             relu_backward_opt #(.NEGATIVE_SLOPE(NEGATIVE_SLOPE)) 
17 |                 relu_ops ( .clk(clk), .reset(reset), 
18 |                             .in_data(in_vec[i]), .out_data(out_vec[i]) );
19 |         end
20 |     endgenerate
21 | 
22 | endmodule
23 | 


--------------------------------------------------------------------------------
/rtl/relu_backward_layer_tb.sv:
--------------------------------------------------------------------------------
 1 | `timescale 1ns/100ps
 2 | 
 3 | module relu_backward_layer_tb();
 4 | 	`include "/home/b/FPGA-CNN/test/test_data/relu_backward_test_data.vh"
 5 | 	parameter CYCLE 			= 5;		//clk period: 5ns = 200Mhz signal
 6 | 	parameter NEG_SLOPE 		= 0.0;	//negative slope param
 7 | 	parameter WIDTH 			= 8;		//width of input/output vec
 8 | 	
 9 | 	parameter NUM_TESTS 		= 5000;	//number of test iterations
10 | 	parameter MEM_SIZE		= NUM_TESTS*WIDTH; 
11 | 
12 | 	reg clk, reset;
13 | 	reg [31:0] in_vec [WIDTH-1:0];	//input vec to module
14 | 	reg [31:0] out_vec [WIDTH-1:0];	//outout vec from module
15 | 	int i, j, num_errors;
16 | 	
17 | 	//initialize clk
18 | 	initial begin
19 | 		clk = 0;
20 | 	end
21 | 	
22 | 	//forever cycle the clk
23 | 	always begin
24 | 		#(CYCLE/2.0) clk = ~clk;
25 | 	end
26 | 	
27 | 	//instantiate the module
28 | 	relu_backward_layer #(.WIDTH(8), .NEGATIVE_SLOPE(NEG_SLOPE) )
29 | 							relu( .clk(clk), .reset(reset), .id(8'b0), .in_vec(in_vec), .out_vec(out_vec) );
30 | 					
31 | 	initial begin
32 | 		reset = 0;
33 | 		num_errors = 0;
34 | 		//for all test cases
35 | 		for (i = 0; i < MEM_SIZE; i = i+(WIDTH)) begin
36 | 			//for each value in input vector
37 | 			for (j = 0; j < WIDTH; j++) begin
38 | 				//use test input value as input
39 | 				in_vec[j] = test_input[i+j];
40 | 			end
41 | 			//wait for it...
42 | 			#(CYCLE)
43 | 			//for each value in output vector (same size as input)
44 | 			for (j = 0; j < WIDTH; j++) begin
45 | 				//check output of module against value calculated by Python
46 | 				$display("output: %h\tcalculated:%h", out_vec[j], test_output[i+j]);
47 | 				assert( out_vec[j] == test_output[i+j] );
48 | 				//if we were wrong, increase error count
49 | 				if( out_vec[j] != test_output[i+j] ) begin
50 | 					num_errors++;
51 | 				end
52 | 			end
53 | 		end
54 | 		$display("############################################\n");
55 | 		$display("Testing complete!\n");
56 | 		$display("%d of %d tests passed!\n", NUM_TESTS-num_errors, NUM_TESTS);
57 | 		$display("(%f percent)\n", 100*(NUM_TESTS-num_errors)/NUM_TESTS);
58 | 		$display("############################################\n");
59 | 	end
60 | endmodule
61 | 
62 | 


--------------------------------------------------------------------------------
/rtl/relu_backward_layer_tb.sv.bak:
--------------------------------------------------------------------------------
 1 | `timescale 1ns/1ns
 2 | 
 3 | module relu_backward_layer_tb();
 4 | 
 5 |     parameter CYCLE = 100;
 6 |     //use $shortrealtobuts() to convert float to binary
 7 |     parameter NEG_SLOPE = $shortrealtobits(0.0001);
 8 |     parameter WIDTH = 4;
 9 | 
10 |     reg clk, reset;
11 |     shortreal a [WIDTH-1:0];
12 |     reg [31:0] b [WIDTH-1:0];
13 |     reg random_sign;            //1 bit for random float sign
14 |     reg [7:0] random_exp;       //8 bits for random float exp
15 |     reg [22:0] random_mantissa; //23 bits for random float mantissa
16 |     
17 |     //forever cycle the clk
18 |     initial begin
19 |         clk <= 0;
20 |         forever begin
21 |             #(CYCLE/2) clk = ~clk;
22 |         end
23 |     end
24 | 
25 |     relu_backward_layer #(.NEGATIVE_SLOPE(NEG_SLOPE), .WIDTH(WIDTH) )
26 |                     relu( .clk(clk), .reset(reset), .in_vec(a), .out_vec(b) );
27 | 
28 |     int i, j;
29 |     initial begin
30 |         reset = 0;
31 |         repeat(10) begin
32 |             i = i+1;
33 |             //build the input vector of floats
34 |             for (j = 0; j < WIDTH; j = j+1) begin
35 |                 $display("Build\tIteration %d\tinput %d\n", i, j);
36 |                 //generate a random sign bit, exponent, and mantissa value
37 |                 random_sign = $urandom(i+j) % 2;
38 |                 random_exp = $urandom(i+j+2) % 255;
39 |                 random_mantissa = $urandom(i+j+5);
40 |                 //concatenate sign bit, exponent, and mantissa to make float
41 |                 a[j] = {random_sign, random_exp, random_mantissa};
42 |             end
43 |             //take a quick break
44 |             $display("a[0]: %b\n", a[0]);
45 |             #(3*CYCLE)
46 |             //check the output vector of floats
47 |             for (j = 0; j < WIDTH; j = j+1) begin
48 |                 $display("Test: Iteration %d\tinput %d\n", i, j);
49 |                 //if the input value is greater than zero, then
50 |                 //output should be same as input
51 |                 if ($bitstoshortreal(a[j]) > 0e0) begin
52 |                     $display("%f is greater than zero: %b\n", a[j], a[j]);
53 |                     $display("a: %b b: %b\n", a[j], b[j]); 
54 |                     assert( a[j] == b[j] );
55 |                 //else input value less than or equal to zero,
56 |                 //so output should be NEG_SLOPE
57 |                 end else begin
58 |                     $display("%f is less than or equal to zero: %b\n", a[j], a[j]);
59 |                     $display("NEG_SLOPE: %b\n", NEG_SLOPE);
60 |                     $display("a: %b b: %b\n", a[j], b[j]); 
61 |                     assert( $bitstoshortreal(b[j]) == $bitstoshortreal(NEG_SLOPE) );
62 |                 end
63 |             end
64 |         end
65 |         $display("############################################\n");
66 |         $display("All tests passed!\n");
67 |         $display("############################################\n");
68 |     end
69 | endmodule
70 | 
71 | 


--------------------------------------------------------------------------------
/rtl/relu_backward_opt.sv:
--------------------------------------------------------------------------------
 1 | 
 2 | module relu_backward_opt( input   logic         clk,        //clock signal
 3 |                           input   logic         reset,      //reset signal
 4 |                           input   logic [31:0]  in_data,    //32-bit float
 5 |                           output  logic [31:0]  out_data);  //32-bit float
 6 | 
 7 |     parameter NEGATIVE_SLOPE = 0.0;
 8 |   
 9 |     //at rising edge of clock
10 |     always @(posedge clk, negedge reset) begin 
11 |         //check for reset value, else continue
12 |         if (!reset) begin
13 |             //if value is positive, output the value
14 |             if (in_data[31] == 0) begin
15 |                 out_data <= in_data;
16 |             //else output the NEGATIVE_SLOPE (usually 0)
17 |             end else begin
18 |                 out_data <= NEGATIVE_SLOPE;
19 |             end
20 |         end
21 |     end
22 | endmodule
23 | 


--------------------------------------------------------------------------------
/rtl/relu_backward_opt_tb.sv:
--------------------------------------------------------------------------------
 1 | `timescale 1ns/1ns
 2 | 
 3 | module relu_backward_opt_tb();
 4 | 
 5 |     parameter CYCLE = 100;
 6 |     //use $shortrealtobuts() to convert float to binary
 7 |     parameter NEG_SLOPE = $shortrealtobits(0.0001);
 8 | 
 9 |     reg clk, reset;
10 |     shortreal a;
11 |     reg [31:0] b;
12 |     reg random_sign;            //1 bit for random float sign
13 |     reg [7:0] random_exp;       //8 bits for random float exp
14 |     reg [22:0] random_mantissa; //23 bits for random float mantissa
15 |     
16 |     //forever cycle the clk
17 |     initial begin
18 |         clk <= 0;
19 |         forever begin
20 |             #(CYCLE/2) clk = ~clk;
21 |         end
22 |     end
23 | 
24 |     relu_backward_opt #(.NEGATIVE_SLOPE(NEG_SLOPE)) relu( .clk(clk), .reset(reset), .in_data(a), .out_data(b) );
25 | 
26 |     int i;
27 |     initial begin
28 |         reset = 0;
29 |         repeat(10000) begin
30 |             i = i+1;
31 |             //generate a random sign bit, exponent, and mantissa value
32 |             random_sign = $urandom(i) % 2;
33 |             random_exp = $urandom(i+2) % 255;
34 |             random_mantissa = $urandom(i+5);
35 |             //concatenate sign bit, exponent, and mantissa to make float
36 |             a = {random_sign, random_exp, random_mantissa};
37 |             #(3*CYCLE)
38 |             //if the input value is greater than zero, then
39 |             //output should be same as input
40 |             if ($bitstoshortreal(a) > 0e0) begin
41 |                 //$display("%f is greater than zero: %b\n", a, a);
42 |                 //$display("a: %b b: %b\n", a, b); 
43 |                 assert( a == b );
44 |             //else input value less than or equal to zero,
45 |             //so output should be NEG_SLOPE
46 |             end else begin
47 |                 //$display("%f is less than or equal to zero: %b\n", a, a);
48 |                 //$display("NEG_SLOPE: %b\n", NEG_SLOPE);
49 |                 //$display("a: %b b: %b\n", a, b); 
50 |                 assert( $bitstoshortreal(b) == $bitstoshortreal(NEG_SLOPE) );
51 |             end
52 |         end
53 |         $display("############################################\n");
54 |         $display("All tests passed!\n");
55 |         $display("############################################\n");
56 |     end
57 | endmodule
58 | 
59 | 


--------------------------------------------------------------------------------
/rtl/relu_forward.sv:
--------------------------------------------------------------------------------
 1 | module relu_forward #(parameter negative_slope = 0, parameter WIDTH = 8)
 2 | 			(	
 3 | 				input logic          reset_n,   //reset	
 4 | 				input logic				clk_en,
 5 | 				input logic          clk,	//clock signal
 6 | 				input logic  [31:0]  in_data [WIDTH-1:0], 	//data vector of floats
 7 | 				input logic [7:0] in_id,
 8 | 				output reg [31:0]  out_data [WIDTH-1:0],	//data vector of floats
 9 | 				output reg [7:0] out_id
10 | 			);
11 | 
12 | 
13 | 	//default negative slope is 0
14 | 	
15 | 	genvar i;
16 | 	generate
17 | 		for(i = 0; i < WIDTH; i = i+1) begin : RELU_FORWARD_MULT
18 | 			relu_forward_opt #(.negative_slope(negative_slope))
19 | 				opt(
20 | 										.reset_n(reset_n),
21 | 										.clk_en(clk_en),
22 | 										.clk(clk),
23 | 										.in_data(in_data[i]),
24 | 										.out_data(out_data[i])
25 | 									);
26 | 		end
27 | 	
28 | 	endgenerate
29 | 	
30 | 	always @(posedge clk) begin
31 | 		//b <= out_data;
32 | 		out_id <= in_id;
33 | 	end
34 | 						
35 | endmodule
36 | 
37 | module relu_forward_opt #(parameter negative_slope = 0)
38 | 			(	
39 | 				input logic          reset_n,   //reset
40 | 				input logic				clk_en,
41 | 				input logic          clk,	//clock signal
42 | 				input logic  [31:0] 	in_data, 	//data vector of floats
43 | 				output reg 	 [31:0]  out_data //data vector of floats
44 | 			);
45 | 
46 | 	reg [31:0] b; 
47 | 	floating_mult floating_mult_inst( 
48 | 										.clk_en(clk_en),
49 | 										.clock(clk), 
50 | 										.dataa(in_data), 
51 | 										.datab(b), 
52 | 										.result(out_data)
53 | 										); 
54 | 	
55 | 		always @(posedge clk) begin
56 | 			if (in_data[31] == 0) begin
57 | 				//if positive, multiply input by 1 (don't change)
58 | 				b <= 1'b00111111100000000000000000000000;
59 | 			end else begin
60 | 				b <= negative_slope;
61 | 			end
62 | 			b <= out_data;
63 | 		end
64 | 		
65 | endmodule
66 | 


--------------------------------------------------------------------------------
/rtl/relu_forward_tb.sv:
--------------------------------------------------------------------------------
 1 | `timescale 1ns/100ps
 2 | 
 3 | module relu_forward_tb();
 4 | 	`include "/nfs/stak/students/z/zhangso/ECE441/relu_forward/test_data/relu_forward_test_data.vh"
 5 | 	parameter CYCLE			= 5;	//clk period: 5 ns = 200 MHz signal
 6 | 	parameter NEG_SLOPE		= 0.0;	//parameter negative slope
 7 | 	parameter WIDTH			= 8;	//width of the input and output vectors
 8 | 
 9 | 	parameter NUM_TESTS		= 4000;	//number of test iterations
10 | 	parameter MEM_SIZE		= NUM_TESTS*WIDTH; 
11 | 
12 | 	reg clk, reset;
13 | 	reg clk_en;
14 | 	reg [31:0] in_data [WIDTH-1:0];	//input vec to module
15 | 	reg [31:0] out_data [WIDTH-1:0];	//output vec from module
16 | 	int i, j, num_errors;
17 | 	
18 | 	//initialize clk
19 | 	initial begin
20 | 		clk = 0;
21 | 		//clk_en = 1;
22 | 	end
23 | 	
24 | 	//forever cycle the clk
25 | 	always begin
26 | 		#(CYCLE/2.0) clk = ~clk;
27 | 	end
28 | 	
29 | 	//instantiate the module
30 | 	relu_forward #(.negative_slope(NEG_SLOPE), .WIDTH(8) )
31 |         	relu( .reset_n(reset), .clk(clk), .clk_en(clk_en), .in_data(in_data), .out_data(out_data) );
32 | 
33 | 	initial begin
34 | 		reset = 0;
35 | 		num_errors = 0;
36 | 		//for all test cases
37 | 		for (i = 0; i < MEM_SIZE; i = i+(WIDTH)) begin
38 | 			//for each value in input vector
39 | 			for (j = 0; j < WIDTH; j++) begin
40 | 				//use test input value as input
41 | 				in_data[j] = test_input[i+j];
42 | 			end
43 | 			//wait for it...
44 | 			#(5*CYCLE) //5*CYCLE due to multiplication
45 | 			//for each value in output vector (same size as input)
46 | 			for (j = 0; j < WIDTH; j++) begin
47 | 				//check output of module against value calculated by Python
48 | 				$display("output: %h\tcalculated:%h", out_data[j], test_output[i+j]);
49 | 				assert( out_data[j] == test_output[i+j] );
50 | 				//if we were wrong, increase error count
51 | 				if( out_data[j] != test_output[i+j] ) begin
52 | 					num_errors++;
53 | 				end
54 | 			end
55 | 		end
56 | 		$display("############################################\n");
57 | 		$display("Testing complete!\n");
58 | 		$display("%d of %d tests passed!\n", NUM_TESTS-num_errors, NUM_TESTS);
59 | 		$display("(%f percent)\n", 100.0*(NUM_TESTS-num_errors)/NUM_TESTS);
60 | 		$display("############################################\n");
61 | 	end
62 | endmodule
63 | 


--------------------------------------------------------------------------------
/test/conv_forward_tests_header.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import csv
  4 | import random
  5 | import struct
  6 | import argparse
  7 | import numpy as np
  8 | 
  9 | data_file_name = 'test_data/conv_forward_test_data.vh'
 10 | 
 11 | # convert floating point value to hex value
 12 | def float_to_hex(f):
 13 | 	return format(struct.unpack('<I', struct.pack('<f', f))[0], 'x') 
 14 | 
 15 | def build_data_line(vec_name, vec, start_index, hex_or_float):
 16 | 	# if given a single float, try statement will fail 
 17 | 	try:
 18 | 		# build assignment 
 19 | 		line = vec_name + '[' + str(start_index) + ':' + str(start_index + len(vec)-1) + '] = \'{'
 20 | 		# for each val in vector, add to array literal
 21 | 		for i in range( 0, len(vec) ):
 22 | 			if (i != len(vec)-1):
 23 | 				if (hex_or_float == 'hex'):
 24 | 					line = line + '32\'h' + float_to_hex( vec[i] ) + ', '
 25 | 				else:
 26 | 					line = line + str(vec[i]) + ', '
 27 | 			else:
 28 | 				if (hex_or_float == 'hex'):
 29 | 					line = line + '32\'h' + float_to_hex( vec[i] ) + '};'
 30 | 				else:
 31 | 					line = line + str(vec[i]) + '};'
 32 | 	except TypeError:
 33 | 		line = vec_name + '[' + str(start_index) + '] = \'{'
 34 | 		if (hex_or_float == 'hex'):
 35 | 			line = line + '32\'h' + float_to_hex( vec ) + '};'
 36 | 		else:
 37 | 			line = line + str(vec) + '};'
 38 | 	return [line]
 39 | 
 40 | #####################################################################
 41 | # the test data file will consist of hexadecimal values without the 
 42 | # '0x' prefix since Quartus doesn't like that. 
 43 | # 
 44 | #####################################################################
 45 | def main():
 46 | 	# parse command line arguments
 47 | 	parser = argparse.ArgumentParser()
 48 | 	parser.add_argument('--NUM_TESTS', '-n', type=int, default=10000,
 49 | 				help='number of tests to generate (default: 10000)')
 50 | 	parser.add_argument('--UPPER_RANGE', '-ur', type=int, default=100,
 51 | 				help='upper range of random number gen (default: 100)')
 52 | 	parser.add_argument('--LOWER_RANGE', '-lr', type=int, default=-100,
 53 | 				help='lower range of random number gen (default: -100)')
 54 | 	parser.add_argument('--VECTOR_LENGTH', '-l', type=int, default=8,
 55 | 				help='input vector length (default: 8)')
 56 | 	parser.add_argument('--BIAS_TERM', '-b', action='store_true', default=False,
 57 | 				help='flag to include bias term (default: False)')
 58 | 	parser.add_argument('--FILENAME', '-f', default=data_file_name,
 59 | 				help='location/filename of data file to create')
 60 | 	parser.add_argument('--DEBUG', '-d', action='store_true', default=False,
 61 | 				help='flag for debug (default: False)')
 62 | 	args = parser.parse_args()	
 63 | 
 64 | 	NUM_TESTS = args.NUM_TESTS
 65 | 	UPPER_RANGE = args.UPPER_RANGE
 66 | 	LOWER_RANGE = args.LOWER_RANGE
 67 | 	VECTOR_LENGTH = args.VECTOR_LENGTH
 68 | 	BIAS_TERM = args.BIAS_TERM
 69 | 	FILENAME = args.FILENAME
 70 | 	DEBUG = args.DEBUG
 71 | 
 72 | 	with open( FILENAME, 'wb') as data_f:
 73 | 		print 'Creating test data file...'
 74 | 		f = csv.writer( data_f, delimiter='\t' )
 75 | 		# create header for test data file
 76 | 		f.writerow( ['`ifndef CONV_FORWARD_TEST_H'] )
 77 | 		f.writerow( ['`define CONV_FORWARD_TEST_H'] )
 78 | 		# define memory array 
 79 | 		f.writerow( ['reg [31:0] test_input [' + str(VECTOR_LENGTH*NUM_TESTS) + '];'] )
 80 | 		f.writerow( ['reg [31:0] test_weights [' + str(VECTOR_LENGTH*NUM_TESTS) + '];'] )
 81 | 		f.writerow( ['reg [31:0] test_bias [' + str(NUM_TESTS) + '];'] )
 82 | 		f.writerow( ['reg [31:0] test_output [' + str(NUM_TESTS) + '];'] )
 83 | 		# add 'initial begin'
 84 | 		f.writerow( ['initial begin'] )
 85 | 		# add data to header file
 86 | 		for i in range(0, NUM_TESTS*VECTOR_LENGTH, VECTOR_LENGTH):
 87 | 			# generate a random vector of floats: LOWER_RANGE <= a < UPPER_RANGE
 88 | 			input_vec = np.random.uniform( LOWER_RANGE, UPPER_RANGE, VECTOR_LENGTH )
 89 | 			weight_vec = np.random.uniform( LOWER_RANGE, UPPER_RANGE, VECTOR_LENGTH )
 90 | 			# take the dot product
 91 | 			output = np.dot( input_vec, weight_vec )
 92 | 			# if BIAS_TERM, generate random bias term and add to result 
 93 | 			if (BIAS_TERM):
 94 | 				bias_term = np.random.uniform( LOWER_RANGE, UPPER_RANGE, 1 )
 95 | 				output += bias_term
 96 | 			else:
 97 | 				bias_term = 0.0
 98 | 			# write row to file
 99 | 			f.writerow( build_data_line( 'test_input', input_vec, i, 'hex' ) )
100 | 			f.writerow( build_data_line( 'test_weights', weight_vec, i, 'hex' ) )
101 | 			f.writerow( build_data_line( 'test_bias', bias_term, i/VECTOR_LENGTH, 'hex' ) )
102 | 			f.writerow( build_data_line( 'test_output', output, i/VECTOR_LENGTH, 'hex' ) )
103 | 			# for debugging/sanity check..
104 | 			if (DEBUG):	
105 | 				f.writerow( ["/*############ DEBUG ############"] )
106 | 				f.writerow( build_data_line( 'test_input', input_vec, i, 'float' ) )
107 | 				f.writerow( build_data_line( 'test_weights', weight_vec, i, 'float' ) )
108 | 				f.writerow( build_data_line( 'test_bias', bias_term, i/VECTOR_LENGTH, 'float' ) )
109 | 				f.writerow( build_data_line( 'test_output', output, i/VECTOR_LENGTH, 'float' ) )
110 | 				f.writerow( ["############ END DEBUG ############*/"] )
111 | 
112 | 
113 | 		# end the 'initial begin' statement
114 | 		f.writerow( ['end'] )
115 | 		# add endif statement
116 | 		f.writerow( ['`endif'] )
117 | 
118 | 
119 | if __name__ == '__main__':
120 | 	main()
121 | 


--------------------------------------------------------------------------------
/test/pooling_backward_tests_header.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import csv
  4 | import random
  5 | import struct
  6 | import argparse
  7 | import numpy as np
  8 | 
  9 | data_file_name = 'test_data/pooling_backward_test_data.vh'
 10 | 
 11 | # convert floating point value to hex value
 12 | def float_to_hex(f):
 13 | 	return format(struct.unpack('<I', struct.pack('<f', f))[0], 'x') 
 14 | 
 15 | def build_data_line(vec_name, vec, start_index, hex_or_float):
 16 | 	# if given a single float, try statement will fail 
 17 | 	try:
 18 | 		# build assignment 
 19 | 		line = vec_name + '[' + str(start_index) + ':' + str(start_index + len(vec)-1) + '] = \'{'
 20 | 		# for each val in vector, add to array literal
 21 | 		for i in range( 0, len(vec) ):
 22 | 			if (i != len(vec)-1):
 23 | 				if (hex_or_float == 'hex'):
 24 | 					line = line + '32\'h' + float_to_hex( vec[i] ) + ', '
 25 | 				else:
 26 | 					line = line + str(vec[i]) + ', '
 27 | 			else:
 28 | 				if (hex_or_float == 'hex'):
 29 | 					line = line + '32\'h' + float_to_hex( vec[i] ) + '};'
 30 | 				else:
 31 | 					line = line + str(vec[i]) + '};'
 32 | 	except TypeError:
 33 | 		line = vec_name + '[' + str(start_index) + '] = \'{'
 34 | 		if (hex_or_float == 'hex'):
 35 | 			line = line + '32\'h' + float_to_hex( vec ) + '};'
 36 | 		else:
 37 | 			line = line + str(vec) + '};'
 38 | 	return [line]
 39 | 
 40 | #####################################################################
 41 | # the test data file will consist of hexadecimal values without the 
 42 | # '0x' prefix since Quartus doesn't like that. 
 43 | # 
 44 | #####################################################################
 45 | def main():
 46 | 	# parse command line arguments
 47 | 	parser = argparse.ArgumentParser()
 48 | 	parser.add_argument('--NUM_TESTS', '-n', type=int, default=10000,
 49 | 				help='number of tests to generate (default: 10000)')
 50 | 	parser.add_argument('--UPPER_RANGE', '-ur', type=int, default=100,
 51 | 				help='upper range of random number gen (default: 100)')
 52 | 	parser.add_argument('--LOWER_RANGE', '-lr', type=int, default=-100,
 53 | 				help='lower range of random number gen (default: -100)')
 54 | 	parser.add_argument('--VECTOR_LENGTH', '-l', type=int, default=8,
 55 | 				help='input vector length (default: 8)')
 56 | 	parser.add_argument('--FILENAME', '-f', default=data_file_name,
 57 | 				help='location/filename of data file to create')
 58 | 	parser.add_argument('--DEBUG', '-d', action='store_true', default=False,
 59 | 				help='flag for debug (default: False)')
 60 | 	args = parser.parse_args()	
 61 | 
 62 | 	NUM_TESTS = args.NUM_TESTS
 63 | 	UPPER_RANGE = args.UPPER_RANGE
 64 | 	LOWER_RANGE = args.LOWER_RANGE
 65 | 	VECTOR_LENGTH = args.VECTOR_LENGTH
 66 | 	FILENAME = args.FILENAME
 67 | 	DEBUG = args.DEBUG
 68 | 
 69 | 	with open( FILENAME, 'wb') as data_f:
 70 | 		print 'Creating test data file...'
 71 | 		f = csv.writer( data_f, delimiter='\t' )
 72 | 		# create header for test data file
 73 | 		f.writerow( ['`ifndef POOLING_BACKWARD_TEST_H'] )
 74 | 		f.writerow( ['`define POOLING_BACKWARD_TEST_H'] )
 75 | 		# define memory array 
 76 | 		f.writerow( ['reg [31:0] test_input [' + str(VECTOR_LENGTH*NUM_TESTS) + '];'] )
 77 | 		f.writerow( ['reg [31:0] test_index [' + str(NUM_TESTS) + '];'] )
 78 | 		f.writerow( ['reg [31:0] test_error_term [' + str(NUM_TESTS) + '];'] )
 79 | 		f.writerow( ['reg [31:0] test_output [' + str(VECTOR_LENGTH*NUM_TESTS) + '];'] )
 80 | 		# add 'initial begin'
 81 | 		f.writerow( ['initial begin'] )
 82 | 		# add data to header file
 83 | 		for i in range(0, NUM_TESTS*VECTOR_LENGTH, VECTOR_LENGTH):
 84 | 			# generate a random vector of floats: LOWER_RANGE <= a < UPPER_RANGE
 85 | 			input_vec = np.random.uniform( LOWER_RANGE, UPPER_RANGE, VECTOR_LENGTH )
 86 | 			output_vec = np.copy( input_vec )
 87 | 			# generate a random error term
 88 | 			error_term = np.random.uniform( LOWER_RANGE, UPPER_RANGE )	
 89 | 			# get the index of the maximum value
 90 | 			max_index = np.argmax( input_vec )	
 91 | 			# multiply the error term with the max value in the input array
 92 | 			output_vec[ max_index ] *= error_term
 93 | 			# write row to file
 94 | 			f.writerow( build_data_line( 'test_input', input_vec, i, 'hex' ) )
 95 | 			f.writerow( build_data_line( 'test_index', max_index, i/VECTOR_LENGTH, 'float' ) )
 96 | 			f.writerow( build_data_line( 'test_error_term', error_term, i/VECTOR_LENGTH, 'hex' ) )
 97 | 			f.writerow( build_data_line( 'test_output', output_vec, i, 'hex' ) )
 98 | 			# for debugging/sanity check..
 99 | 			if (DEBUG):	
100 | 				f.writerow( ["/*############ DEBUG ############"] )
101 | 				f.writerow( build_data_line( 'test_input', input_vec, i, 'float' ) )
102 | 				f.writerow( build_data_line( 'test_index', max_index, i/VECTOR_LENGTH, 'float' ) )
103 | 				f.writerow( build_data_line( 'test_error_term', error_term, i/VECTOR_LENGTH, 'float' ) )
104 | 				f.writerow( build_data_line( 'test_output', output_vec, i, 'float' ) )
105 | 				f.writerow( ["############ END DEBUG ############*/"] )
106 | 
107 | 
108 | 		# end the 'initial begin' statement
109 | 		f.writerow( ['end'] )
110 | 		# add endif statement
111 | 		f.writerow( ['`endif'] )
112 | 
113 | 
114 | if __name__ == '__main__':
115 | 	main()
116 | 


--------------------------------------------------------------------------------
/test/pooling_forward_tests_header.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import csv
  4 | import random
  5 | import struct
  6 | import argparse
  7 | import numpy as np
  8 | 
  9 | data_file_name = 'test_data/pooling_forward_test_data.vh'
 10 | 
 11 | # convert floating point value to hex value
 12 | def float_to_hex(f):
 13 | 	return format(struct.unpack('<I', struct.pack('<f', f))[0], 'x') 
 14 | 
 15 | def build_data_line(vec_name, vec, start_index, hex_or_float):
 16 | 	# if given a single float, try statement will fail 
 17 | 	try:
 18 | 		# build assignment 
 19 | 		line = vec_name + '[' + str(start_index) + ':' + str(start_index + len(vec)-1) + '] = \'{'
 20 | 		# for each val in vector, add to array literal
 21 | 		for i in range( 0, len(vec) ):
 22 | 			if (i != len(vec)-1):
 23 | 				if (hex_or_float == 'hex'):
 24 | 					line = line + '32\'h' + float_to_hex( vec[i] ) + ', '
 25 | 				else:
 26 | 					line = line + str(vec[i]) + ', '
 27 | 			else:
 28 | 				if (hex_or_float == 'hex'):
 29 | 					line = line + '32\'h' + float_to_hex( vec[i] ) + '};'
 30 | 				else:
 31 | 					line = line + str(vec[i]) + '};'
 32 | 	except TypeError:
 33 | 		line = vec_name + '[' + str(start_index) + '] = \'{'
 34 | 		if (hex_or_float == 'hex'):
 35 | 			line = line + '32\'h' + float_to_hex( vec ) + '};'
 36 | 		else:
 37 | 			line = line + str(vec) + '};'
 38 | 	return [line]
 39 | 
 40 | #####################################################################
 41 | # the test data file will consist of hexadecimal values without the 
 42 | # '0x' prefix since Quartus doesn't like that. 
 43 | # 
 44 | #####################################################################
 45 | def main():
 46 | 	# parse command line arguments
 47 | 	parser = argparse.ArgumentParser()
 48 | 	parser.add_argument('--NUM_TESTS', '-n', type=int, default=10000,
 49 | 				help='number of tests to generate (default: 10000)')
 50 | 	parser.add_argument('--UPPER_RANGE', '-ur', type=int, default=100,
 51 | 				help='upper range of random number gen (default: 100)')
 52 | 	parser.add_argument('--LOWER_RANGE', '-lr', type=int, default=-100,
 53 | 				help='lower range of random number gen (default: -100)')
 54 | 	parser.add_argument('--VECTOR_LENGTH', '-l', type=int, default=8,
 55 | 				help='input vector length (default: 8)')
 56 | 	parser.add_argument('--FILENAME', '-f', default=data_file_name,
 57 | 				help='location/filename of data file to create')
 58 | 	parser.add_argument('--DEBUG', '-d', action='store_true', default=False,
 59 | 				help='flag for debug (default: False)')
 60 | 	args = parser.parse_args()	
 61 | 
 62 | 	NUM_TESTS = args.NUM_TESTS
 63 | 	UPPER_RANGE = args.UPPER_RANGE
 64 | 	LOWER_RANGE = args.LOWER_RANGE
 65 | 	VECTOR_LENGTH = args.VECTOR_LENGTH
 66 | 	FILENAME = args.FILENAME
 67 | 	DEBUG = args.DEBUG
 68 | 
 69 | 	with open( FILENAME, 'wb') as data_f:
 70 | 		print 'Creating test data file...'
 71 | 		f = csv.writer( data_f, delimiter='\t' )
 72 | 		# create header for test data file
 73 | 		f.writerow( ['`ifndef POOLING_FORWARD_TEST_H'] )
 74 | 		f.writerow( ['`define POOLING_FORWARD_TEST_H'] )
 75 | 		# define memory array 
 76 | 		f.writerow( ['reg [31:0] test_input [' + str(VECTOR_LENGTH*NUM_TESTS) + '];'] )
 77 | 		f.writerow( ['reg [31:0] test_output [' + str(NUM_TESTS) + '];'] )
 78 | 		f.writerow( ['reg [31:0] test_index [' + str(NUM_TESTS) + '];'] )
 79 | 		# add 'initial begin'
 80 | 		f.writerow( ['initial begin'] )
 81 | 		# add data to header file
 82 | 		for i in range(0, NUM_TESTS*VECTOR_LENGTH, VECTOR_LENGTH):
 83 | 			# generate a random vector of floats: LOWER_RANGE <= a < UPPER_RANGE
 84 | 			input_vec = np.random.uniform( LOWER_RANGE, UPPER_RANGE, VECTOR_LENGTH )
 85 | 			# get the index of the maximum value
 86 | 			max_index = np.argmax( input_vec )	
 87 | 			output = input_vec[ max_index ]
 88 | 			# write row to file
 89 | 			f.writerow( build_data_line( 'test_input', input_vec, i, 'hex' ) )
 90 | 			f.writerow( build_data_line( 'test_output', output, i/VECTOR_LENGTH, 'hex' ) )
 91 | 			f.writerow( build_data_line( 'test_index', max_index, i/VECTOR_LENGTH, 'float' ) )
 92 | 			# for debugging/sanity check..
 93 | 			if (DEBUG):	
 94 | 				f.writerow( ["/*############ DEBUG ############"] )
 95 | 				f.writerow( build_data_line( 'test_input', input_vec, i, 'float' ) )
 96 | 				f.writerow( build_data_line( 'test_output', output, i/VECTOR_LENGTH, 'float' ) )
 97 | 				f.writerow( build_data_line( 'test_index', max_index, i/VECTOR_LENGTH, 'float' ) )
 98 | 				f.writerow( ["############ END DEBUG ############*/"] )
 99 | 
100 | 
101 | 		# end the 'initial begin' statement
102 | 		f.writerow( ['end'] )
103 | 		# add endif statement
104 | 		f.writerow( ['`endif'] )
105 | 
106 | 
107 | if __name__ == '__main__':
108 | 	main()
109 | 


--------------------------------------------------------------------------------
/test/relu_backward_tests_header.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import csv
  4 | import random
  5 | import struct
  6 | import argparse
  7 | 
  8 | data_file_name = 'test_data/relu_backward_test_data.vh'
  9 | 
 10 | # convert floating point value to hex value
 11 | def float_to_hex(f):
 12 | 	return format(struct.unpack('<I', struct.pack('<f', f))[0], 'x') 
 13 | 
 14 | def build_data_line(vec_name, vec, start_index, hex_or_float):
 15 | 	# if given a single float, try statement will fail 
 16 | 	try:
 17 | 		# build assignment 
 18 | 		line = vec_name + '[' + str(start_index) + ':' + str(start_index + len(vec)-1) + '] = \'{'
 19 | 		# for each val in vector, add to array literal
 20 | 		for i in range( 0, len(vec) ):
 21 | 			if (i != len(vec)-1):
 22 | 				if (hex_or_float == 'hex'):
 23 | 					line = line + '32\'h' + float_to_hex( vec[i] ) + ', '
 24 | 				else:
 25 | 					line = line + str(vec[i]) + ', '
 26 | 			else:
 27 | 				if (hex_or_float == 'hex'):
 28 | 					line = line + '32\'h' + float_to_hex( vec[i] ) + '};'
 29 | 				else:
 30 | 					line = line + str(vec[i]) + '};'
 31 | 	except TypeError:
 32 | 		line = vec_name + '[' + str(start_index) + '] = \'{'
 33 | 		if (hex_or_float == 'hex'):
 34 | 			line = line + '32\'h' + float_to_hex( vec ) + '};'
 35 | 		else:
 36 | 			line = line + str(vec) + '};'
 37 | 	return [line]
 38 | 
 39 | #####################################################################
 40 | # the test data file will consist of hexadecimal values without the 
 41 | # '0x' prefix since Quartus doesn't like that. 
 42 | # 
 43 | # Each row will contain:
 44 | #	2*VECTOR_LENGTH 32-bit floating-point vals 
 45 | #  where row structure (order) is:
 46 | # (1): VECTOR_LENGTH input 32-bit floats, followed by
 47 | # (2): VECTOR_LENGTH results from the ReLU operation
 48 | # 
 49 | # example: //input0, input1, result0, result1
 50 | #####################################################################
 51 | def main():
 52 | 	# parse command line arguments
 53 | 	parser = argparse.ArgumentParser()
 54 | 	parser.add_argument('--NUM_TESTS', '-n', type=int, default=10000,
 55 | 				help='number of tests to generate (default: 10000)')
 56 | 	parser.add_argument('--UPPER_RANGE', '-ur', type=int, default=100,
 57 | 				help='upper range of random number gen (default: 100)')
 58 | 	parser.add_argument('--LOWER_RANGE', '-lr', type=int, default=-100,
 59 | 				help='lower range of random number gen (default: -100)')
 60 | 	parser.add_argument('--VECTOR_LENGTH', '-l', type=int, default=8,
 61 | 				help='input vector length (default: 8)')
 62 | 	parser.add_argument('--NEGATIVE_SLOPE', '-s', type=float, default=0.0,
 63 | 				help='negative slope value (default: 0.0)')
 64 | 	parser.add_argument('--FILENAME', '-f', default=data_file_name,
 65 | 				help='location/filename of data file to create')
 66 | 	parser.add_argument('--DEBUG', '-d', action='store_true', default=False,
 67 | 				help='flag for debug (default: False)')
 68 | 	args = parser.parse_args()	
 69 | 
 70 | 	NUM_TESTS = args.NUM_TESTS
 71 | 	UPPER_RANGE = args.UPPER_RANGE
 72 | 	LOWER_RANGE = args.LOWER_RANGE
 73 | 	VECTOR_LENGTH = args.VECTOR_LENGTH
 74 | 	NEGATIVE_SLOPE = args.NEGATIVE_SLOPE
 75 | 	FILENAME = args.FILENAME
 76 | 	DEBUG = args.DEBUG
 77 | 
 78 | 	with open( FILENAME, 'wb') as data_f:
 79 | 		print 'Creating test data file...'
 80 | 		f = csv.writer( data_f, delimiter='\t' )
 81 | 		# create header for test data file
 82 | 		f.writerow( ['`ifndef RELU_BACKWARD_TEST_H'] )
 83 | 		f.writerow( ['`define RELU_BACKWARD_TEST_H'] )
 84 | 		# define memory array 
 85 | 		f.writerow( ['reg [31:0] test_input [' + str(VECTOR_LENGTH*NUM_TESTS) + '];'] )
 86 | 		f.writerow( ['reg [31:0] test_output [' + str(VECTOR_LENGTH*NUM_TESTS) + '];'] )
 87 | 		# add 'initial begin'
 88 | 		f.writerow( ['initial begin'] )
 89 | 		# add data to header file
 90 | 		for i in range(0, NUM_TESTS*VECTOR_LENGTH, VECTOR_LENGTH):
 91 | 			input_vec = []
 92 | 			output_vec = []
 93 | 			# build input and output vectors
 94 | 			for j in range(0, VECTOR_LENGTH):
 95 | 				# generate a random float value LOWER_RANGE <= input_val < UPPER_RANGE
 96 | 				input_val = random.uniform( LOWER_RANGE, UPPER_RANGE )
 97 | 				# if input_val > 0, output = input
 98 | 				if (input_val > 0):
 99 | 					output_val = input_val;
100 | 				# else, output = NEGATIVE_SLOPE (usually 0)
101 | 				else:
102 | 					output_val = NEGATIVE_SLOPE
103 | 				# add to vectors
104 | 				input_vec.append( input_val )
105 | 				output_vec.append( output_val )
106 | 			f.writerow( build_data_line( 'test_input', input_vec, i, 'hex' ) )
107 | 			f.writerow( build_data_line( 'test_output', output_vec, i, 'hex' ) )
108 | 			# for debugging/sanity check..
109 | 			if (DEBUG):	
110 | 				f.writerow( ["/*############ DEBUG ############"] )
111 | 				f.writerow( build_data_line( 'test_input', input_vec, i, 'float' ) )
112 | 				f.writerow( build_data_line( 'test_output', output_vec, i, 'float' ) )
113 | 				f.writerow( ["############ END DEBUG ############*/"] )
114 | 		# end the 'initial begin' statement
115 | 		f.writerow( ['end'] )
116 | 		# add endif statement
117 | 		f.writerow( ['`endif'] )
118 | 
119 | 
120 | if __name__ == '__main__':
121 | 	main()
122 | 


--------------------------------------------------------------------------------
/test/relu_forward_tests_header.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import csv
  4 | import random
  5 | import struct
  6 | import argparse
  7 | import numpy as np
  8 | 
  9 | data_file_name = 'test_data/relu_forward_test_data.vh'
 10 | 
 11 | # convert floating point value to hex value
 12 | def float_to_hex(f):
 13 | 	return format(struct.unpack('<I', struct.pack('<f', f))[0], 'x') 
 14 | 
 15 | def build_data_line(vec_name, vec, start_index, hex_or_float):
 16 | 	# if given a single float, try statement will fail 
 17 | 	try:
 18 | 		# build assignment 
 19 | 		line = vec_name + '[' + str(start_index) + ':' + str(start_index + len(vec)-1) + '] = \'{'
 20 | 		# for each val in vector, add to array literal
 21 | 		for i in range( 0, len(vec) ):
 22 | 			if (i != len(vec)-1):
 23 | 				if (hex_or_float == 'hex'):
 24 | 					line = line + '32\'h' + float_to_hex( vec[i] ) + ', '
 25 | 				else:
 26 | 					line = line + str(vec[i]) + ', '
 27 | 			else:
 28 | 				if (hex_or_float == 'hex'):
 29 | 					line = line + '32\'h' + float_to_hex( vec[i] ) + '};'
 30 | 				else:
 31 | 					line = line + str(vec[i]) + '};'
 32 | 	except TypeError:
 33 | 		line = vec_name + '[' + str(start_index) + '] = \'{'
 34 | 		if (hex_or_float == 'hex'):
 35 | 			line = line + '32\'h' + float_to_hex( vec ) + '};'
 36 | 		else:
 37 | 			line = line + str(vec) + '};'
 38 | 	return [line]
 39 | 
 40 | #####################################################################
 41 | # the test data file will consist of hexadecimal values without the 
 42 | # '0x' prefix since Quartus doesn't like that. 
 43 | # 
 44 | #####################################################################
 45 | def main():
 46 | 	# parse command line arguments
 47 | 	parser = argparse.ArgumentParser()
 48 | 	parser.add_argument('--NUM_TESTS', '-n', type=int, default=10000,
 49 | 				help='number of tests to generate (default: 10000)')
 50 | 	parser.add_argument('--UPPER_RANGE', '-ur', type=int, default=100,
 51 | 				help='upper range of random number gen (default: 100)')
 52 | 	parser.add_argument('--LOWER_RANGE', '-lr', type=int, default=-100,
 53 | 				help='lower range of random number gen (default: -100)')
 54 | 	parser.add_argument('--VECTOR_LENGTH', '-l', type=int, default=8,
 55 | 				help='input vector length (default: 8)')
 56 | 	parser.add_argument('--NEGATIVE_SLOPE', '-s', type=float, default=0.0,
 57 | 				help='negative slope value (default: 0.0)')
 58 | 	parser.add_argument('--FILENAME', '-f', default=data_file_name,
 59 | 				help='location/filename of data file to create')
 60 | 	parser.add_argument('--DEBUG', '-d', action='store_true', default=False,
 61 | 				help='flag for debug (default: False)')
 62 | 	args = parser.parse_args()	
 63 | 
 64 | 	NUM_TESTS = args.NUM_TESTS
 65 | 	UPPER_RANGE = args.UPPER_RANGE
 66 | 	LOWER_RANGE = args.LOWER_RANGE
 67 | 	VECTOR_LENGTH = args.VECTOR_LENGTH
 68 | 	NEGATIVE_SLOPE = args.NEGATIVE_SLOPE
 69 | 	FILENAME = args.FILENAME
 70 | 	DEBUG = args.DEBUG
 71 | 
 72 | 	with open( FILENAME, 'wb') as data_f:
 73 | 		print 'Creating test data file...'
 74 | 		f = csv.writer( data_f, delimiter='\t' )
 75 | 		# create header for test data file
 76 | 		f.writerow( ['`ifndef RELU_FORWARD_TEST_H'] )
 77 | 		f.writerow( ['`define RELU_FORWARD_TEST_H'] )
 78 | 		# define memory array 
 79 | 		f.writerow( ['reg [31:0] test_input [' + str(VECTOR_LENGTH*NUM_TESTS) + '];'] )
 80 | 		f.writerow( ['reg [31:0] test_output [' + str(VECTOR_LENGTH*NUM_TESTS) + '];'] )
 81 | 		# add 'initial begin'
 82 | 		f.writerow( ['initial begin'] )
 83 | 		# add data to header file
 84 | 		for i in range(0, NUM_TESTS*VECTOR_LENGTH, VECTOR_LENGTH):
 85 | 			# generate a random float value LOWER_RANGE <= input_val < UPPER_RANGE
 86 | 			input_vec = np.random.uniform( LOWER_RANGE, UPPER_RANGE, VECTOR_LENGTH )
 87 | 			output_vec = []
 88 | 			# build input and output vectors
 89 | 			for j in range(0, VECTOR_LENGTH):
 90 | 				# if input_val > 0, output = input
 91 | 				if (input_vec[j] > 0):
 92 | 					output_vec.append( input_vec[j] );
 93 | 				# else, output = NEGATIVE_SLOPE (usually 0)
 94 | 				else:
 95 | 					output_vec.append( input_vec[j]*NEGATIVE_SLOPE )
 96 | 			f.writerow( build_data_line( 'test_input', input_vec, i, 'hex' ) )
 97 | 			f.writerow( build_data_line( 'test_output', output_vec, i, 'hex' ) )
 98 | 			# for debugging/sanity check..
 99 | 			if (DEBUG):	
100 | 				f.writerow( ["/*############ DEBUG ############"] )
101 | 				f.writerow( build_data_line( 'test_input', input_vec, i, 'float' ) )
102 | 				f.writerow( build_data_line( 'test_output', output_vec, i, 'float' ) )
103 | 				f.writerow( ["############ END DEBUG ############*/"] )
104 | 		
105 | 		# end the 'initial begin' statement
106 | 		f.writerow( ['end'] )
107 | 		# add endif statement
108 | 		f.writerow( ['`endif'] )
109 | 
110 | 
111 | if __name__ == '__main__':
112 | 	main()
113 | 


--------------------------------------------------------------------------------
/test/softmax_with_loss_tests_header.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import csv
  4 | import random
  5 | import struct
  6 | import argparse
  7 | import numpy as np
  8 | 
  9 | data_file_name = 'test_data/softmax_with_loss_test_data.vh'
 10 | 
 11 | # convert floating point value to hex value
 12 | def float_to_hex(f):
 13 | 	return format(struct.unpack('<I', struct.pack('<f', f))[0], 'x') 
 14 | 
 15 | def build_data_line(vec_name, vec, start_index, hex_or_float):
 16 | 	# if given a single float, try statement will fail 
 17 | 	try:
 18 | 		# build assignment 
 19 | 		line = vec_name + '[' + str(start_index) + ':' + str(start_index + len(vec)-1) + '] = \'{'
 20 | 		# for each val in vector, add to array literal
 21 | 		for i in range( 0, len(vec) ):
 22 | 			if (i != len(vec)-1):
 23 | 				if (hex_or_float == 'hex'):
 24 | 					line = line + '32\'h' + float_to_hex( vec[i] ) + ', '
 25 | 				else:
 26 | 					line = line + str(vec[i]) + ', '
 27 | 			else:
 28 | 				if (hex_or_float == 'hex'):
 29 | 					line = line + '32\'h' + float_to_hex( vec[i] ) + '};'
 30 | 				else:
 31 | 					line = line + str(vec[i]) + '};'
 32 | 	except TypeError:
 33 | 		line = vec_name + '[' + str(start_index) + '] = \'{'
 34 | 		if (hex_or_float == 'hex'):
 35 | 			line = line + '32\'h' + float_to_hex( vec ) + '};'
 36 | 		else:
 37 | 			line = line + str(vec) + '};'
 38 | 	return [line]
 39 | 
 40 | #####################################################################
 41 | # the test data file will consist of hexadecimal values without the 
 42 | # '0x' prefix since Quartus doesn't like that. 
 43 | # 
 44 | #####################################################################
 45 | def main():
 46 | 	# parse command line arguments
 47 | 	parser = argparse.ArgumentParser()
 48 | 	parser.add_argument('--NUM_TESTS', '-n', type=int, default=10000,
 49 | 				help='number of tests to generate (default: 10000)')
 50 | 	parser.add_argument('--UPPER_RANGE', '-ur', type=int, default=100,
 51 | 				help='upper range of random number gen (default: 100)')
 52 | 	parser.add_argument('--LOWER_RANGE', '-lr', type=int, default=-100,
 53 | 				help='lower range of random number gen (default: -100)')
 54 | 	parser.add_argument('--VECTOR_LENGTH', '-l', type=int, default=8,
 55 | 				help='input vector length (default: 8)')
 56 | 	parser.add_argument('--FILENAME', '-f', default=data_file_name,
 57 | 				help='location/filename of data file to create')
 58 | 	parser.add_argument('--DEBUG', '-d', action='store_true', default=False,
 59 | 				help='flag for debug (default: False)')
 60 | 	args = parser.parse_args()	
 61 | 
 62 | 	NUM_TESTS = args.NUM_TESTS
 63 | 	UPPER_RANGE = args.UPPER_RANGE
 64 | 	LOWER_RANGE = args.LOWER_RANGE
 65 | 	VECTOR_LENGTH = args.VECTOR_LENGTH
 66 | 	FILENAME = args.FILENAME
 67 | 	DEBUG = args.DEBUG
 68 | 
 69 | 	with open( FILENAME, 'wb') as data_f:
 70 | 		print 'Creating test data file...'
 71 | 		f = csv.writer( data_f, delimiter='\t' )
 72 | 		# create header for test data file
 73 | 		f.writerow( ['`ifndef SOFTMAX_WITH_LOSS_TEST_H'] )
 74 | 		f.writerow( ['`define SOFTMAX_WITH_LOSS_TEST_H'] )
 75 | 		# define memory array 
 76 | 		f.writerow( ['reg [31:0] test_input [' + str(VECTOR_LENGTH*NUM_TESTS) + '];'] )
 77 | 		f.writerow( ['reg [31:0] test_label [' + str(NUM_TESTS) + '];'] )
 78 | 		f.writerow( ['reg [31:0] test_sub [' + str(VECTOR_LENGTH*NUM_TESTS) + '];'] )
 79 | 		f.writerow( ['reg [31:0] test_exp [' + str(VECTOR_LENGTH*NUM_TESTS) + '];'] )
 80 | 		f.writerow( ['reg [31:0] test_sum [' + str(NUM_TESTS) + '];'] )
 81 | 		f.writerow( ['reg [31:0] test_div [' + str(NUM_TESTS) + '];'] )
 82 | 		f.writerow( ['reg [31:0] test_output [' + str(NUM_TESTS) + '];'] )
 83 | 		# add 'initial begin'
 84 | 		f.writerow( ['initial begin'] )
 85 | 		# add data to header file
 86 | 		for i in range(0, NUM_TESTS*VECTOR_LENGTH, VECTOR_LENGTH):
 87 | 			# generate a random vector of floats: LOWER_RANGE <= a < UPPER_RANGE
 88 | 			input_vec = np.random.uniform( LOWER_RANGE, UPPER_RANGE, VECTOR_LENGTH )
 89 | 			# generate a random integer label from 0 < UPPER_RANGE
 90 | 			#label = np.random.randint( 0, VECTOR_LENGTH )
 91 | 			label = np.argmax( input_vec )
 92 | 			# scale input vector by subtracting label value from all values to prevent overflow 
 93 | 			scaled_input_vec = np.subtract( input_vec, np.repeat( input_vec[label], VECTOR_LENGTH ) )
 94 | 			# compute exp of every element in input_vec
 95 | 			exp_vec = np.exp( scaled_input_vec )
 96 | 			# calculate sum of exp_vec
 97 | 			exp_vec_sum = np.sum( exp_vec )
 98 | 			# exp of scaled label value
 99 | 			exp_label = np.exp( scaled_input_vec[label] )
100 | 			# divide exp of label by sum of all exps
101 | 			div = np.divide( exp_label, exp_vec_sum )
102 | 			# compute the log loss
103 | 			output = np.multiply( np.log( div ), -1.0 )
104 | 			# write row to file
105 | 			f.writerow( build_data_line( 'test_input', input_vec, i, 'hex' ) )
106 | 			f.writerow( build_data_line( 'test_label', input_vec[label], i/VECTOR_LENGTH, 'hex' ) )
107 | 			f.writerow( build_data_line( 'test_sub', scaled_input_vec, i, 'hex' ) )
108 | 			f.writerow( build_data_line( 'test_exp', exp_vec, i, 'hex' ) )
109 | 			f.writerow( build_data_line( 'test_sum', exp_vec_sum, i/VECTOR_LENGTH, 'hex' ) )
110 | 			f.writerow( build_data_line( 'test_div', div, i/VECTOR_LENGTH, 'hex' ) )
111 | 			f.writerow( build_data_line( 'test_output', output, i/VECTOR_LENGTH, 'hex' ) )
112 | 			# for debugging/sanity check..
113 | 			if (DEBUG):	
114 | 				f.writerow( ["/*############ DEBUG ############"] )
115 | 				f.writerow( build_data_line( 'test_input', input_vec, i, 'float' ) )
116 | 				f.writerow( build_data_line( 'test_label', input_vec[label], i/VECTOR_LENGTH, 'float' ) )
117 | 				f.writerow( build_data_line( 'test_sub', scaled_input_vec, i, 'float' ) )
118 | 				f.writerow( build_data_line( 'test_exp', exp_vec, i, 'float' ) )
119 | 				f.writerow( build_data_line( 'test_sum', exp_vec_sum, i/VECTOR_LENGTH, 'float' ) )
120 | 				f.writerow( build_data_line( 'test_div', div, i/VECTOR_LENGTH, 'float' ) )
121 | 				f.writerow( build_data_line( 'test_output', output, i/VECTOR_LENGTH, 'float' ) )
122 | 				f.writerow( ["############ END DEBUG ############*/"] )
123 | 
124 | 
125 | 		# end the 'initial begin' statement
126 | 		f.writerow( ['end'] )
127 | 		# add endif statement
128 | 		f.writerow( ['`endif'] )
129 | 
130 | 
131 | if __name__ == '__main__':
132 | 	main()
133 | 


--------------------------------------------------------------------------------
/test/test_data/ip_backward_test_data.vh:
--------------------------------------------------------------------------------
 1 | `ifndef CONV_FORWARD_TEST_H
 2 | `define CONV_FORWARD_TEST_H
 3 | reg [31:0] test_input [32];
 4 | reg [31:0] test_weights [32];
 5 | reg [31:0] test_bias [4];
 6 | reg [31:0] test_output [4];
 7 | initial begin
 8 | test_input[0:7] = '{32'hc26fdfa8, 32'h41ea2b51, 32'h3e756069, 32'hc2bee7c4, 32'h423ca13e, 32'hc267968b, 32'hc2a4d6eb, 32'hbf24c4f5};
 9 | test_weights[0:7] = '{32'hc2bd3f2f, 32'h41ef4376, 32'hc27bd7e5, 32'hc1f08333, 32'h42bbe746, 32'hc27ea48c, 32'h426c47d8, 32'h42a66c68};
10 | test_bias[0] = '{32'h0};
11 | test_output[0] = '{32'h4644da95};
12 | test_input[8:15] = '{32'h412d307a, 32'h428e1128, 32'h41c2d156, 32'hc28b75cb, 32'hc2a8e015, 32'hc2be6fb6, 32'h42ae2cc5, 32'h40c8c53b};
13 | test_weights[8:15] = '{32'hc2abe774, 32'hc25839a7, 32'hc22b3847, 32'hc2ae443f, 32'hc23d0c3a, 32'hc229a788, 32'h42b10fa1, 32'hc27d5624};
14 | test_bias[1] = '{32'h0};
15 | test_output[1] = '{32'h4673d400};
16 | test_input[16:23] = '{32'hc14ebce8, 32'h429d3955, 32'h4287bf7d, 32'hc2a29a68, 32'h424818dd, 32'hc24a668e, 32'h42bb185d, 32'hc1d7b44e};
17 | test_weights[16:23] = '{32'hc2a85391, 32'h42354df9, 32'h40af3936, 32'hc239319f, 32'h41e7b5dd, 32'h4231946c, 32'hc2b7509c, 32'h42932f0a};
18 | test_bias[2] = '{32'h0};
19 | test_output[2] = '{32'hc5209a1c};
20 | test_input[24:31] = '{32'hc1f5d9f1, 32'h425b502f, 32'h42a95bb6, 32'hbf8976a8, 32'hc2c6ad6a, 32'h42a94dbc, 32'h42a60f14, 32'hc1f0abe3};
21 | test_weights[24:31] = '{32'hc216c366, 32'h427c6416, 32'h42c1a9d4, 32'h42707064, 32'h42c3c705, 32'h42199cbe, 32'hc2b224ec, 32'h42a226d2};
22 | test_bias[3] = '{32'h0};
23 | test_output[3] = '{32'hc55e307e};
24 | end
25 | `endif
26 | 


--------------------------------------------------------------------------------
/test/test_data/ip_forward_test_data.vh:
--------------------------------------------------------------------------------
 1 | `ifndef CONV_FORWARD_TEST_H
 2 | `define CONV_FORWARD_TEST_H
 3 | reg [31:0] test_input [32];
 4 | reg [31:0] test_weights [32];
 5 | reg [31:0] test_bias [4];
 6 | reg [31:0] test_output [4];
 7 | initial begin
 8 | test_input[0:7] = '{32'hc29babdf, 32'hc273d7f6, 32'h4292f89c, 32'hc2894f57, 32'hc1fdd75c, 32'h42628044, 32'h41e8624d, 32'h424e104f};
 9 | test_weights[0:7] = '{32'h4287dc82, 32'hc211106b, 32'hc2658a1c, 32'hc25aa83e, 32'hc2b2814c, 32'hc2c1a613, 32'hc182b02e, 32'hc2b88647};
10 | test_bias[0] = '{32'h0};
11 | test_output[0] = '{32'hc6326b46};
12 | test_input[8:15] = '{32'hc27d80f0, 32'hc189b156, 32'hc1cf120c, 32'hc0b9bfdf, 32'hc2910799, 32'hc1cda669, 32'h427a385c, 32'h42b6a9f0};
13 | test_weights[8:15] = '{32'hc2afd0f5, 32'hbfd0a2b0, 32'hc2afbafe, 32'h41d6b10e, 32'h427ef9d7, 32'h428a8c9e, 32'hc28c551e, 32'h42935a67};
14 | test_bias[1] = '{32'h0};
15 | test_output[1] = '{32'h456466a3};
16 | test_input[16:23] = '{32'h40afcd90, 32'hc1100f6e, 32'hc2050fd3, 32'hc2027ce1, 32'hc1dc4b0c, 32'hc2868392, 32'hc2b219ef, 32'h419723db};
17 | test_weights[16:23] = '{32'hbfb9865b, 32'h4078fde1, 32'hc2900ecd, 32'hc1cffe60, 32'hc1507d6b, 32'h4044d4fe, 32'h41c27277, 32'hc123a934};
18 | test_bias[2] = '{32'h0};
19 | test_output[2] = '{32'h4478dfaa};
20 | test_input[24:31] = '{32'h4086cebe, 32'h419665ce, 32'h40948667, 32'h42061dd8, 32'hc1980fbd, 32'hc2622d0a, 32'h41402536, 32'hc24713c5};
21 | test_weights[24:31] = '{32'hc2334e0f, 32'hc206e7f0, 32'hc2c6b639, 32'h424cb5bd, 32'hc18b3bb8, 32'h428ab0f2, 32'hc2c32273, 32'h4287e1a5};
22 | test_bias[3] = '{32'h0};
23 | test_output[3] = '{32'hc5f0fb62};
24 | end
25 | `endif
26 | 


--------------------------------------------------------------------------------
/test/test_data/relu_forward_test_data.vh:
--------------------------------------------------------------------------------
 1 | `ifndef RELU_FORWARD_TEST_H
 2 | `define RELU_FORWARD_TEST_H
 3 | reg [31:0] test_input [32];
 4 | reg [31:0] test_output [32];
 5 | initial begin
 6 | test_input[0:7] = '{32'hc24cc7b2, 32'hc03942dd, 32'h42ac2ee9, 32'hc2a124a4, 32'h4284650a, 32'h411f589b, 32'h42c7a9ad, 32'h429d677d};
 7 | test_output[0:7] = '{32'h0, 32'h0, 32'h42ac2ee9, 32'h0, 32'h4284650a, 32'h411f589b, 32'h42c7a9ad, 32'h429d677d};
 8 | test_input[8:15] = '{32'hc20fb75a, 32'hc1617cb2, 32'hc2af2f38, 32'hc2308ca7, 32'hc1468566, 32'hc2b42a23, 32'hc2419dc9, 32'h42b6ab60};
 9 | test_output[8:15] = '{32'h0, 32'h0, 32'h0, 32'h0, 32'h0, 32'h0, 32'h0, 32'h42b6ab60};
10 | test_input[16:23] = '{32'h41d54d29, 32'h4262adb9, 32'h41acdc10, 32'h42a6be27, 32'h42939ccf, 32'h41df8a5d, 32'h424a5b51, 32'hc28aac05};
11 | test_output[16:23] = '{32'h41d54d29, 32'h4262adb9, 32'h41acdc10, 32'h42a6be27, 32'h42939ccf, 32'h41df8a5d, 32'h424a5b51, 32'h0};
12 | test_input[24:31] = '{32'hc25a0698, 32'h42616625, 32'h420abe2d, 32'h429b77b5, 32'h4259205d, 32'h4214999c, 32'hc26a3450, 32'hc29bb946};
13 | test_output[24:31] = '{32'h0, 32'h42616625, 32'h420abe2d, 32'h429b77b5, 32'h4259205d, 32'h4214999c, 32'h0, 32'h0};
14 | end
15 | `endif
16 | 


--------------------------------------------------------------------------------
/tools/caffe_install_deps.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #########################################################
  3 | #
  4 | # Script to install Caffe dependencies 
  5 | # 
  6 | # For each dependency we download the source and compile,
  7 | # and then add the path to the binaries/libraries to 
  8 | # our .bashrc and .cshrc, as well as the path to the 
  9 | # header files and libraries to our Caffe Makefile.config
 10 | #########################################################
 11 | BASE=$PWD
 12 | NUM_CORES=`nproc`
 13 | 
 14 | # YUM deps needed to clone git repos
 15 | #sudo yum install git
 16 | # YUM deps needed for protobuf
 17 | #sudo yum install -y autoconf automake libtool gcc-c++
 18 | # YUM deps needed for hdf5
 19 | #sudo yum install -y zlib zlib-devel
 20 | 
 21 | ##############################################
 22 | ##### download Caffe
 23 | ##############################################
 24 | git clone https://github.com/BVLC/caffe.git 
 25 | cd caffe
 26 | cp Makefile.config.example Makefile.config
 27 | CAFFE_CONFIG=$BASE/caffe/Makefile.config
 28 | echo '# Automatically generated statements' >> $CAFFE_CONFIG
 29 | 
 30 | ##############################################
 31 | ##### Install Depdendencies
 32 | ##############################################
 33 | # create dependencies dir if not exist
 34 | mkdir -p $BASE/dependencies
 35 | 
 36 | ##############################################
 37 | ##### install boost 1.59.0
 38 | ##############################################
 39 | cd $BASE/dependencies
 40 | mkdir -p boost
 41 | cd boost
 42 | wget http://sourceforge.net/projects/boost/files/boost/1.59.0/boost_1_59_0.tar.gz
 43 | tar -zxvf boost_1_59_0.tar.gz
 44 | cd boost_1_59_0
 45 | ./bootstrap.sh --prefix=${PWD}
 46 | ./b2 install -j${NUM_CORES}
 47 | 
 48 | echo "export LD_LIBRARY_PATH=\"${PWD}/lib:"'${LD_LIBRARY_PATH}"' >> ~/.bashrc
 49 | echo "export PATH=\"${PWD}/bin:"'${PATH}"' >> ~/.bashrc
 50 | echo "setenv LD_LIBRARY_PATH \"${PWD}/lib:"'${LD_LIBRARY_PATH}"' >> ~/.cshrc
 51 | echo "setenv PATH \"${PWD}/bin:"'${PATH}"' >> ~/.csshrc
 52 | echo "INCLUDE_DIRS += ${PWD}/include" >> $CAFFE_CONFIG
 53 | echo "LIBRARY_DIRS += ${PWD}/lib" >> $CAFFE_CONFIG
 54 | 
 55 | ##############################################
 56 | ##### install protobuf
 57 | ##############################################
 58 | cd $BASE/dependencies
 59 | # build protobuf
 60 | git clone https://github.com/google/protobuf.git
 61 | cd protobuf
 62 | ./autogen.sh
 63 | ./configure --prefix=${PWD} && make -j${NUM_CORES} && make install
 64 | 
 65 | echo "export LD_LIBRARY_PATH=\"${PWD}/lib:"'${LD_LIBRARY_PATH}"' >> ~/.bashrc
 66 | echo "export PATH=\"${PWD}/bin:"'${PATH}"' >> ~/.bashrc
 67 | echo "setenv LD_LIBRARY_PATH \"${PWD}/lib:"'${LD_LIBRARY_PATH}"' >> ~/.cshrc
 68 | echo "setenv PATH \"${PWD}/bin:"'${PATH}"' >> ~/.csshrc
 69 | echo "INCLUDE_DIRS += ${PWD}/include" >> $CAFFE_CONFIG
 70 | echo "LIBRARY_DIRS += ${PWD}/lib" >> $CAFFE_CONFIG
 71 | 
 72 | ##############################################
 73 | ##### install snappy
 74 | ##############################################
 75 | cd $BASE/dependencies
 76 | mkdir -p snappy
 77 | cd snappy
 78 | wget https://snappy.googlecode.com/files/snappy-1.1.1.tar.gz
 79 | tar -xzvf snappy-1.1.1.tar.gz
 80 | cd snappy-1.1.1
 81 | ./configure --prefix=${PWD} && make -j${NUM_CORES} && make install
 82 | 
 83 | echo "export LD_LIBRARY_PATH=\"${PWD}/lib:"'${LD_LIBRARY_PATH}"' >> ~/.bashrc
 84 | echo "export PATH=\"${PWD}/bin:"'${PATH}"' >> ~/.bashrc
 85 | echo "setenv LD_LIBRARY_PATH \"${PWD}/lib:"'${LD_LIBRARY_PATH}"' >> ~/.cshrc
 86 | echo "setenv PATH \"${PWD}/bin:"'${PATH}"' >> ~/.csshrc
 87 | echo "INCLUDE_DIRS += ${PWD}/include" >> $CAFFE_CONFIG
 88 | echo "LIBRARY_DIRS += ${PWD}/lib" >> $CAFFE_CONFIG
 89 | 
 90 | ##############################################
 91 | ##### install gflags
 92 | ##############################################
 93 | cd $BASE/dependencies
 94 | mkdir -p gflags
 95 | cd gflags
 96 | wget https://gflags.googlecode.com/files/gflags-2.0-no-svn-files.tar.gz
 97 | tar -xzvf gflags-2.0-no-svn-files.tar.gz
 98 | cd gflags-2.0
 99 | ./configure --prefix=${PWD} && make -j${NUM_CORES} && make install
100 | 
101 | echo "export LD_LIBRARY_PATH=\"${PWD}/lib:"'${LD_LIBRARY_PATH}"' >> ~/.bashrc
102 | echo "export PATH=\"${PWD}/bin:"'${PATH}"' >> ~/.bashrc
103 | echo "setenv LD_LIBRARY_PATH \"${PWD}/lib:"'${LD_LIBRARY_PATH}"' >> ~/.cshrc
104 | echo "setenv PATH \"${PWD}/bin:"'${PATH}"' >> ~/.csshrc
105 | echo "INCLUDE_DIRS += ${PWD}/include" >> $CAFFE_CONFIG
106 | echo "LIBRARY_DIRS += ${PWD}/lib" >> $CAFFE_CONFIG
107 | 
108 | ##############################################
109 | ###### install glog
110 | ##############################################
111 | cd $BASE/dependencies
112 | mkdir -p glog
113 | cd glog
114 | wget https://google-glog.googlecode.com/files/glog-0.3.3.tar.gz
115 | tar zxvf glog-0.3.3.tar.gz
116 | cd glog-0.3.3
117 | ./configure --prefix=${PWD} && make -j${NUM_CORES} && make install
118 | 
119 | echo "export LD_LIBRARY_PATH=\"${PWD}/lib:"'${LD_LIBRARY_PATH}"' >> ~/.bashrc
120 | echo "export PATH=\"${PWD}/bin:"'${PATH}"' >> ~/.bashrc
121 | echo "setenv LD_LIBRARY_PATH \"${PWD}/lib:"'${LD_LIBRARY_PATH}"' >> ~/.cshrc
122 | echo "setenv PATH \"${PWD}/bin:"'${PATH}"' >> ~/.csshrc
123 | echo "INCLUDE_DIRS += ${PWD}/include" >> $CAFFE_CONFIG
124 | echo "LIBRARY_DIRS += ${PWD}/lib" >> $CAFFE_CONFIG
125 | 
126 | ##### install lmdb
127 | cd $BASE/dependencies
128 | git clone https://github.com/LMDB/lmdb
129 | cd lmdb/libraries/liblmdb
130 | sed -i 's_/usr/local_._g' Makefile 
131 | make -j${NUM_CORES} && make install
132 | 
133 | echo "export LD_LIBRARY_PATH=\"${PWD}/lib:"'${LD_LIBRARY_PATH}"' >> ~/.bashrc
134 | echo "export PATH=\"${PWD}/bin:"'${PATH}"' >> ~/.bashrc
135 | echo "setenv LD_LIBRARY_PATH \"${PWD}/lib:"'${LD_LIBRARY_PATH}"' >> ~/.cshrc
136 | echo "setenv PATH \"${PWD}/bin:"'${PATH}"' >> ~/.csshrc
137 | echo "INCLUDE_DIRS += ${PWD}/include" >> $CAFFE_CONFIG
138 | echo "LIBRARY_DIRS += ${PWD}/lib" >> $CAFFE_CONFIG
139 | 
140 | ##############################################
141 | ##### install hdf5
142 | ##############################################
143 | cd $BASE/dependencies
144 | mkdir -p hdf5
145 | cd hdf5
146 | wget http://www.hdfgroup.org/ftp/HDF5/current/src/hdf5-1.8.16.tar.gz
147 | tar -zxvf hdf5-1.8.16.tar.gz 
148 | cd hdf5-1.8.16
149 | ./configure --prefix=${PWD} && make -j${NUM_CORES} && make install
150 | 
151 | echo "export LD_LIBRARY_PATH=\"${PWD}/lib:"'${LD_LIBRARY_PATH}"' >> ~/.bashrc
152 | echo "export PATH=\"${PWD}/bin:"'${PATH}"' >> ~/.bashrc
153 | echo "setenv LD_LIBRARY_PATH \"${PWD}/lib:"'${LD_LIBRARY_PATH}"' >> ~/.cshrc
154 | echo "setenv PATH \"${PWD}/bin:"'${PATH}"' >> ~/.csshrc
155 | echo "LIBRARY_DIRS += ${PWD}/lib" >> $CAFFE_CONFIG
156 | echo "INCLUDE_DIRS += ${PWD}/include" >> $CAFFE_CONFIG
157 | 
158 | echo 
159 | echo "########################################################"
160 | echo "Please source your ~/.bashrc file and/or ~/.cshrc file"
161 | echo "i.e.: source ~/.bashrc "
162 | echo "########################################################"
163 | 
164 | 


--------------------------------------------------------------------------------
/tools/nvidia_smi_command.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh 
2 | 
3 | # this command is used to query the Nvidia GPUs for various metrics 
4 | 
5 | nvidia-smi --query-gpu=timestamp,index,pstate,memory.total,memory.used,memory.free,utilization.gpu,utilization.memory,power.draw,power.limit,clocks.gr,clocks.sm,clocks.mem,clocks.applications.gr,clocks.applications.mem,gpu_uuid -l 10 -f gpu_metric_dump.csv --format=csv,nounits
6 | 


--------------------------------------------------------------------------------