├── 64_4 ag4bits data32bits fixedpt ├── pipeline.v ├── pipeline_power_routed.rpt ├── pipeline_timing_summary_routed.rpt ├── pipeline_utilization_placed.rpt ├── qmaxtable.v ├── qtable.v ├── rtable.v └── testbench.v ├── 64_8 float ├── pipeline.v ├── qmaxtable.v ├── qtable.v └── rtable.v ├── 65536_8 floatpoint ├── all_float │ ├── pipeline.v │ ├── pipeline_power_routed.rpt │ ├── pipeline_timing_summary_routed.rpt │ ├── pipeline_utilization_placed.rpt │ ├── qmaxtable.v │ ├── qtable.v │ └── rtable.v ├── pipeline.v ├── pipeline_power_routed.rpt ├── pipeline_timing_summary_routed.rpt ├── pipeline_utilization_placed.rpt ├── qmaxtable.v ├── qtable.v ├── rtable.v └── testbench.v ├── 65536_8 ├── pipeline.v ├── qmaxtable.v ├── qtable.v ├── rtable.v └── testbench.v ├── LICENSE ├── README.md ├── Screen Shot 2019-09-02 at 2.20.06 AM.png ├── pipeline.v ├── pipeline_power_routed.rpt ├── pipeline_timing_summary_routed.rpt ├── pipeline_utilization_placed.rpt ├── pipqrl.PNG ├── qmaxtable.v ├── qt_mem_init.txt ├── qtable.v ├── rtable.v └── testbench.v /64_4 ag4bits data32bits fixedpt/pipeline.v: -------------------------------------------------------------------------------- 1 | // `timescale 1ns / 1ps 2 | // ////////////////////////////////////////////////////////////////////////////////// 3 | // // Company: 4 | // // Engineer: 5 | // // 6 | // // Create Date: 09/02/2019 10:53:28 AM 7 | // // Design Name: 8 | // // Module Name: pipeline 9 | // // Project Name: 10 | // // Target Devices: 11 | // // Tool Versions: 12 | // // Description: 13 | // // 14 | // // Dependencies: 15 | // // 16 | // // Revision: 17 | // // Revision 0.01 - File Created 18 | // // Additional Comments: 19 | // // 20 | // ////////////////////////////////////////////////////////////////////////////////// 21 | 22 | 23 | // //store tables in BRAMs 24 | // //width depends on range of q value, depth depends on number of states times num of actions 25 | 26 | // //The 4-stage pipeline 27 | // //inputs: action 28 | // module pipeline #(parameter ADDR_WIDTH = 8, DATA_WIDTH = 32, DEPTH = 16) ( input clk,input rst, input[1:0] action, output reg[47:0] sum); 29 | 30 | // //used in stage 1 31 | // reg[DATA_WIDTH-1:0] q; //q value 32 | // reg[DATA_WIDTH-1:0] r; //reward 33 | // reg[DATA_WIDTH-1:0] q1; //q value 34 | // reg[DATA_WIDTH-1:0] r1; //reward 35 | // reg[DATA_WIDTH-1:0] qmax; 36 | 37 | // reg[5:0] s; //2^6 possible states (8x8 (x,y) grid, s[5:3]s -> x, s[2:0] -> y) 38 | // reg[7:0] alpha; 39 | // reg[7:0] oneminusa; //1-alpha 40 | // reg[7:0] gamma; 41 | // reg[15:0] ag; //alpha*gamma 42 | 43 | // //propagate for qmax writing address 44 | // reg[5:0] current_s ; 45 | // reg[5:0] current_s1 ; 46 | // reg[5:0] current_s2 ; 47 | // reg[5:0] current_s3 ; 48 | 49 | // //propagate for q writing address 50 | // reg[1:0] current_a ; 51 | // reg[1:0] current_a1 ; 52 | // reg[1:0] current_a2 ; 53 | // reg[1:0] current_a3 ; 54 | 55 | // reg[2:0] sx ; // s[5:3]s -> x, 56 | // reg[2:0] sy ; // s[2:0] -> y) 57 | // reg[5:0] nexts; //next state for state transition 58 | 59 | // //used in stage 2 60 | 61 | // //used in stage 3 62 | // //reg [23:0] sum; 63 | // wire agvalid; 64 | // //used in stage 1 and 4 65 | // //used for q table reading & writing 66 | // reg [ADDR_WIDTH-1:0] addrr_q; 67 | // reg [ADDR_WIDTH-1:0] addrw_q; 68 | // //reg [7:0] addrr_q_tmp; 69 | // //reg [7:0] addr_r_tmp; 70 | // reg rflag_q; //0 or 1 71 | // reg wflag_q; //0 or 1 72 | // reg [DATA_WIDTH-1:0] data_in_q; 73 | // wire [DATA_WIDTH-1:0] data_out_q; 74 | 75 | // //used for qmax table reading & writing 76 | // reg [5:0] addrr_qmax; 77 | // reg [5:0] addrw_qmax; 78 | // reg rflag_qmax; //0 or 1 79 | // reg wflag_qmax; //0 or 1 80 | // reg [DATA_WIDTH-1:0] data_in_qmax; 81 | // wire [DATA_WIDTH-1:0] data_out_qmax; 82 | 83 | // //used for r table reading 84 | // reg [ADDR_WIDTH-1:0] addr_r; 85 | // reg rflag_r; //0 or 1 86 | // wire [DATA_WIDTH-1:0] data_out_r; 87 | // localparam sf = 2.0**-4.0; 88 | // //--------------stage 1----------------- 89 | // always @(posedge clk) begin 90 | // //initialize state and action 91 | // if (rst) begin 92 | // s<=6'b000_000; 93 | // current_s<=6'b000000; 94 | // nexts<=6'b000000;; 95 | // alpha<=8'b0000_0010; //0.8 96 | // gamma<=8'b0000_0010; 97 | // end 98 | 99 | // //calculate 1-a and a*g 100 | // //scaling factor=2.0**-4.0 _ 101 | // ag <= alpha*gamma; 102 | // oneminusa <= 8'b0001_0000 - alpha; 103 | 104 | // //locate next state 105 | // sx<=s[5:3];sy<=s[2:0]; 106 | // if (sx==3'b000 && action==2'b00) begin //left wall 107 | // nexts<=s; 108 | // end 109 | // else if (sy==3'b000 && action==2'b01) begin //up wall 110 | // nexts<=s; 111 | // end 112 | // else if (sx==3'b111 && action==2'b10) begin //right wall 113 | // nexts<=s; 114 | // end 115 | // else if (sy==3'b111 && action==2'b11) begin //down wall 116 | // nexts<=s; 117 | // end 118 | // else begin 119 | // case (action) 120 | // 2'b00: nexts<=s-6'b001000;//to the left by 1 121 | // 2'b01: nexts<=s-6'b000001;//to the up by 1 122 | // 2'b10: nexts<=s+6'b001000;//to the right by 1 123 | // 2'b11: nexts<=s+6'b000001;//to the down by 1 124 | // //default: 125 | // endcase 126 | // //nexts<={sx,sy}; 127 | // end 128 | 129 | // //get address for q and r and qmax 130 | // addrr_q<={s,action}; 131 | // addr_r<={s,action}; 132 | // addrr_qmax<=nexts; 133 | 134 | // //wait and transit the state 135 | // current_s<=s; 136 | // current_s1<=current_s; 137 | // current_a<=action; 138 | // current_a1<=current_a; 139 | // s<=nexts; 140 | // end 141 | 142 | 143 | 144 | // //--------------stage 2----------------- 145 | // always @(posedge clk) begin 146 | // //locate q value from q table, save in q register 147 | // // $display("stage 2 s: %06b,current_s: %06b, action:%02b, addrr_q,%08b", s,current_s,action,addrr_q); 148 | // rflag_q<=1; 149 | // q<=data_out_q; 150 | // q1<=q; 151 | 152 | // rflag_r<=1; 153 | // r<=data_out_r; 154 | // r1<=r; 155 | // //locate Qmax at next state from Qmax table 156 | 157 | // rflag_qmax<=1; 158 | // qmax<=data_out_qmax; 159 | 160 | // current_s2<=current_s1; 161 | // current_a2<=current_a1; 162 | 163 | // end 164 | 165 | // //--------------stage 3----------------- 166 | // //always @(qmax or r or q or ag or oneminusa) begin 167 | 168 | // /*reg [23:0] sum_part1; 169 | // reg [23:0] sum_part2; 170 | // reg [23:0] sum_part3; 171 | 172 | // always@(posedge clk) 173 | // begin 174 | // sum_part1 <= alpha*r1; 175 | // sum_part2 <= oneminusa*q1; 176 | // sum_part3 <= ag*qmax; 177 | // end 178 | 179 | 180 | // always @(posedge clk) begin 181 | // //calculations of q learning function 182 | // //adder 183 | // sum <= sum_part1 + sum_part2 + sum_part3; 184 | // //$display("stage 3 sum: %04h", sum); 185 | 186 | // current_s3<=current_s2; 187 | // current_a3<=current_a2; 188 | // end*/ 189 | // always @(posedge clk) begin 190 | // //calculations of q learning function 191 | // //adder 192 | // sum <= alpha*r1 + oneminusa*q1 + ag*qmax; 193 | // //sum <= alpha*r1*2**(-4) + oneminusa*q1*2**(-4) + ag*qmax*2**(-8); 194 | // //$display("stage 3 sum: %04h", sum); 195 | 196 | // current_s3<=current_s2; 197 | // current_a3<=current_a2; 198 | 199 | // end 200 | 201 | 202 | 203 | // //--------------stage 4----------------- 204 | // //always @(sum) begin 205 | // always @(posedge clk) begin 206 | // // if(ce) begin 207 | // //write back to qmax table 208 | // if (sum>q)begin 209 | // wflag_qmax<=1; 210 | // addrw_qmax<=current_s3; 211 | // data_in_qmax<=sum; 212 | // //$display("stage 4 update qmax data_in_qmax: %02h", data_in_qmax); 213 | // //$display("stage 4 update qmax addrw_qmax: %06b", addrw_qmax); 214 | // end 215 | // //write back to q table 216 | // wflag_q<=1; 217 | // addrw_q<={current_s3,current_a3}; 218 | // data_in_q<=sum; 219 | // //$display("stage 4 update q data_in_q: %02h", data_in_q); 220 | // //$display("stage 4 update q addrw_q: %08b", addrw_q); 221 | // //stop the pipeline if reached end state 222 | // //if (current_s3 == 6'b111111) begin 223 | // // $finish; 224 | // //end 225 | // //end 226 | // end 227 | 228 | // qtable qt0( 229 | // .i_clk(clk), 230 | // .i_rst(rst), 231 | // .i_addr_r(addrr_q), 232 | // .i_addr_w(addrw_q), 233 | // .i_read_en(rflag_q), 234 | // .i_write_en(wflag_q), 235 | // .i_data(data_in_q), 236 | // .o_data(data_out_q)); 237 | 238 | // qmaxtable qmaxt0( 239 | // .i_clk(clk), 240 | // .i_rst(rst), 241 | // .i_addr_r(addrr_qmax), 242 | // .i_addr_w(addrw_qmax), 243 | // .i_read_en(rflag_qmax), 244 | // .i_write_en(wflag_qmax), 245 | // .i_data(data_in_qmax), 246 | // .o_data(data_out_qmax)); 247 | 248 | // rtable rt0( 249 | // .i_clk(clk), 250 | // .i_addr(addr_r), 251 | // .i_read(rflag_r), 252 | // .o_data(data_out_r)); 253 | 254 | // /* floating_point_0 mult ( 255 | // .aclk(clk), // input wire aclk 256 | // .s_axis_a_tvalid(1'b0), // input wire s_axis_a_tvalid 257 | // .s_axis_a_tdata(alpha), // input wire [31 : 0] s_axis_a_tdata 258 | // .s_axis_b_tvalid(1'b0), // input wire s_axis_b_tvalid 259 | // .s_axis_b_tdata(gamma), // input wire [31 : 0] s_axis_b_tdata 260 | // .m_axis_result_tvalid(agvalid), // output wire m_axis_result_tvalid 261 | // .m_axis_result_tdata(ag) // output wire [31 : 0] m_axis_result_tdata 262 | // );*/ 263 | 264 | // endmodule 265 | 266 | `timescale 1ns / 1ps 267 | ////////////////////////////////////////////////////////////////////////////////// 268 | // Company: 269 | // Engineer: 270 | // 271 | // Create Date: 09/02/2019 10:53:28 AM 272 | // Design Name: 273 | // Module Name: pipeline 274 | // Project Name: 275 | // Target Devices: 276 | // Tool Versions: 277 | // Description: 278 | // 279 | // Dependencies: 280 | // 281 | // Revision: 282 | // Revision 0.01 - File Created 283 | // Additional Comments: 284 | // 285 | ////////////////////////////////////////////////////////////////////////////////// 286 | 287 | 288 | //store tables in BRAMs 289 | //width depends on range of q value, depth depends on number of states times num of actions 290 | 291 | //The 4-stage pipeline 292 | //inputs: action 293 | module pipeline #(parameter ADDR_WIDTH = 8, DATA_WIDTH = 32, DEPTH = 16) ( input clk,input rst, input[1:0] action, output reg[47:0] sum); 294 | 295 | //used in stage 1 296 | reg[DATA_WIDTH-1:0] q; //q value 297 | reg[DATA_WIDTH-1:0] r; //reward 298 | reg[DATA_WIDTH-1:0] q1; //q value 299 | reg[DATA_WIDTH-1:0] r1; //reward 300 | reg[DATA_WIDTH-1:0] qmax; 301 | 302 | reg[5:0] s; //2^6 possible states (8x8 (x,y) grid, s[5:3]s -> x, s[2:0] -> y) 303 | reg[7:0] alpha; 304 | reg[7:0] oneminusa; //1-alpha 305 | reg[7:0] gamma; 306 | reg[15:0] ag; //alpha*gamma 307 | 308 | //propagate for qmax writing address 309 | reg[5:0] current_s ; 310 | reg[5:0] current_s1 ; 311 | reg[5:0] current_s2 ; 312 | reg[5:0] current_s3 ; 313 | 314 | //propagate for q writing address 315 | reg[1:0] current_a ; 316 | reg[1:0] current_a1 ; 317 | reg[1:0] current_a2 ; 318 | reg[1:0] current_a3 ; 319 | 320 | reg[2:0] sx ; // s[5:3]s -> x, 321 | reg[2:0] sy ; // s[2:0] -> y) 322 | reg[5:0] nexts; //next state for state transition 323 | 324 | //used in stage 2 325 | 326 | //used in stage 3 327 | //reg [23:0] sum; 328 | wire agvalid; 329 | //used in stage 1 and 4 330 | //used for q table reading & writing 331 | reg [ADDR_WIDTH-1:0] addrr_q; 332 | reg [ADDR_WIDTH-1:0] addrw_q; 333 | //reg [7:0] addrr_q_tmp; 334 | //reg [7:0] addr_r_tmp; 335 | reg rflag_q; //0 or 1 336 | reg wflag_q; //0 or 1 337 | reg [DATA_WIDTH-1:0] data_in_q; 338 | wire [DATA_WIDTH-1:0] data_out_q; 339 | 340 | //used for qmax table reading & writing 341 | reg [5:0] addrr_qmax; 342 | reg [5:0] addrw_qmax; 343 | reg rflag_qmax; //0 or 1 344 | reg wflag_qmax; //0 or 1 345 | reg [DATA_WIDTH-1:0] data_in_qmax; 346 | wire [DATA_WIDTH-1:0] data_out_qmax; 347 | 348 | //used for r table reading 349 | reg [ADDR_WIDTH-1:0] addr_r; 350 | reg rflag_r; //0 or 1 351 | wire [DATA_WIDTH-1:0] data_out_r; 352 | localparam sf = 2.0**-4.0; 353 | //--------------stage 1----------------- 354 | always @(posedge clk) begin 355 | //initialize state and action 356 | if (rst) begin 357 | s<=6'b000_000; 358 | current_s<=6'b000000; 359 | nexts<=6'b000000;; 360 | alpha<=8'b0000_0010; //0.8 361 | gamma<=8'b0000_0010; 362 | end 363 | 364 | //calculate 1-a and a*g 365 | //scaling factor=2.0**-4.0 _ 366 | ag <= alpha*gamma; 367 | oneminusa <= 8'b0001_0000 - alpha; 368 | 369 | //locate next state 370 | sx<=s[5:3];sy<=s[2:0]; 371 | if (sx==3'b000 && action==2'b00) begin //left wall 372 | nexts<=s; 373 | end 374 | else if (sy==3'b000 && action==2'b01) begin //up wall 375 | nexts<=s; 376 | end 377 | else if (sx==3'b111 && action==2'b10) begin //right wall 378 | nexts<=s; 379 | end 380 | else if (sy==3'b111 && action==2'b11) begin //down wall 381 | nexts<=s; 382 | end 383 | else begin 384 | case (action) 385 | 2'b00: nexts<=s-6'b001000;//to the left by 1 386 | 2'b01: nexts<=s-6'b000001;//to the up by 1 387 | 2'b10: nexts<=s+6'b001000;//to the right by 1 388 | 2'b11: nexts<=s+6'b000001;//to the down by 1 389 | //default: 390 | endcase 391 | //nexts<={sx,sy}; 392 | end 393 | 394 | //get address for q and r and qmax 395 | addrr_q<={s,action}; 396 | addr_r<={s,action}; 397 | addrr_qmax<=nexts; 398 | 399 | //wait and transit the state 400 | current_s<=s; 401 | current_s1<=current_s; 402 | current_a<=action; 403 | current_a1<=current_a; 404 | s<=nexts; 405 | end 406 | 407 | 408 | 409 | //--------------stage 2----------------- 410 | always @(posedge clk) begin 411 | //locate q value from q table, save in q register 412 | // $display("stage 2 s: %06b,current_s: %06b, action:%02b, addrr_q,%08b", s,current_s,action,addrr_q); 413 | rflag_q<=1; 414 | q<=data_out_q; 415 | q1<=q; 416 | 417 | rflag_r<=1; 418 | r<=data_out_r; 419 | r1<=r; 420 | //locate Qmax at next state from Qmax table 421 | 422 | rflag_qmax<=1; 423 | qmax<=data_out_qmax; 424 | 425 | current_s2<=current_s1; 426 | current_a2<=current_a1; 427 | 428 | end 429 | 430 | //--------------stage 3----------------- 431 | always @(posedge clk) begin 432 | //calculations of q learning function 433 | //adder 434 | sum <= alpha*r1 + oneminusa*q1 + ag*qmax; 435 | //sum <= alpha*r1*2**(-4) + oneminusa*q1*2**(-4) + ag*qmax*2**(-8); 436 | //$display("stage 3 sum: %04h", sum); 437 | 438 | current_s3<=current_s2; 439 | current_a3<=current_a2; 440 | 441 | end 442 | 443 | 444 | 445 | //--------------stage 4----------------- 446 | //always @(sum) begin 447 | always @(posedge clk) begin 448 | // if(ce) begin 449 | //write back to qmax table 450 | if (sum>q)begin 451 | wflag_qmax<=1; 452 | addrw_qmax<=current_s3; 453 | data_in_qmax<=sum; 454 | end 455 | //write back to q table 456 | wflag_q<=1; 457 | addrw_q<={current_s3,current_a3}; 458 | data_in_q<=sum; 459 | //stop the pipeline if reached end state 460 | //if (current_s3 == 6'b111111) begin 461 | // $finish; 462 | //end 463 | //end 464 | end 465 | 466 | qtable qt0( 467 | .i_clk(clk), 468 | .i_rst(rst), 469 | .i_addr_r(addrr_q), 470 | .i_addr_w(addrw_q), 471 | .i_read_en(rflag_q), 472 | .i_write_en(wflag_q), 473 | .i_data(data_in_q), 474 | .o_data(data_out_q)); 475 | 476 | qmaxtable qmaxt0( 477 | .i_clk(clk), 478 | .i_rst(rst), 479 | .i_addr_r(addrr_qmax), 480 | .i_addr_w(addrw_qmax), 481 | .i_read_en(rflag_qmax), 482 | .i_write_en(wflag_qmax), 483 | .i_data(data_in_qmax), 484 | .o_data(data_out_qmax)); 485 | 486 | rtable rt0( 487 | .i_clk(clk), 488 | .i_addr(addr_r), 489 | .i_read(rflag_r), 490 | .o_data(data_out_r)); 491 | 492 | /* floating_point_0 mult ( 493 | .aclk(clk), // input wire aclk 494 | .s_axis_a_tvalid(1'b0), // input wire s_axis_a_tvalid 495 | .s_axis_a_tdata(alpha), // input wire [31 : 0] s_axis_a_tdata 496 | .s_axis_b_tvalid(1'b0), // input wire s_axis_b_tvalid 497 | .s_axis_b_tdata(gamma), // input wire [31 : 0] s_axis_b_tdata 498 | .m_axis_result_tvalid(agvalid), // output wire m_axis_result_tvalid 499 | .m_axis_result_tdata(ag) // output wire [31 : 0] m_axis_result_tdata 500 | );*/ 501 | 502 | endmodule 503 | -------------------------------------------------------------------------------- /64_4 ag4bits data32bits fixedpt/pipeline_power_routed.rpt: -------------------------------------------------------------------------------- 1 | Copyright 1986-2019 Xilinx, Inc. All Rights Reserved. 2 | ---------------------------------------------------------------------------------------------------------------------------------------------- 3 | | Tool Version : Vivado v.2019.1 (win64) Build 2552052 Fri May 24 14:49:42 MDT 2019 4 | | Date : Sat Sep 14 17:16:25 2019 5 | | Host : DESKTOP-18L2VEF running 64-bit major release (build 9200) 6 | | Command : report_power -file pipeline_power_routed.rpt -pb pipeline_power_summary_routed.pb -rpx pipeline_power_routed.rpx 7 | | Design : pipeline 8 | | Device : xc7vx485tffg1157-1 9 | | Design State : routed 10 | | Grade : commercial 11 | | Process : typical 12 | | Characterization : Production 13 | ---------------------------------------------------------------------------------------------------------------------------------------------- 14 | 15 | Power Report 16 | 17 | Table of Contents 18 | ----------------- 19 | 1. Summary 20 | 1.1 On-Chip Components 21 | 1.2 Power Supply Summary 22 | 1.3 Confidence Level 23 | 2. Settings 24 | 2.1 Environment 25 | 2.2 Clock Constraints 26 | 3. Detailed Reports 27 | 3.1 By Hierarchy 28 | 29 | 1. Summary 30 | ---------- 31 | 32 | +--------------------------+--------------+ 33 | | Total On-Chip Power (W) | 0.298 | 34 | | Design Power Budget (W) | Unspecified* | 35 | | Power Budget Margin (W) | NA | 36 | | Dynamic (W) | 0.055 | 37 | | Device Static (W) | 0.243 | 38 | | Effective TJA (C/W) | 1.4 | 39 | | Max Ambient (C) | 84.6 | 40 | | Junction Temperature (C) | 25.4 | 41 | | Confidence Level | Medium | 42 | | Setting File | --- | 43 | | Simulation Activity File | --- | 44 | | Design Nets Matched | NA | 45 | +--------------------------+--------------+ 46 | * Specify Design Power Budget using, set_operating_conditions -design_power_budget 47 | 48 | 49 | 1.1 On-Chip Components 50 | ---------------------- 51 | 52 | +-------------------------+-----------+----------+-----------+-----------------+ 53 | | On-Chip | Power (W) | Used | Available | Utilization (%) | 54 | +-------------------------+-----------+----------+-----------+-----------------+ 55 | | Clocks | 0.002 | 3 | --- | --- | 56 | | Slice Logic | 0.002 | 417 | --- | --- | 57 | | LUT as Logic | 0.001 | 79 | 303600 | 0.03 | 58 | | Register | <0.001 | 233 | 607200 | 0.04 | 59 | | CARRY4 | <0.001 | 20 | 75900 | 0.03 | 60 | | LUT as Shift Register | <0.001 | 6 | 130800 | <0.01 | 61 | | Others | 0.000 | 10 | --- | --- | 62 | | Signals | 0.004 | 720 | --- | --- | 63 | | Block RAM | 0.007 | 1 | 1030 | 0.10 | 64 | | DSPs | 0.003 | 6 | 2800 | 0.21 | 65 | | I/O | 0.037 | 51 | 600 | 8.50 | 66 | | Static Power | 0.243 | | | | 67 | | Total | 0.298 | | | | 68 | +-------------------------+-----------+----------+-----------+-----------------+ 69 | 70 | 71 | 1.2 Power Supply Summary 72 | ------------------------ 73 | 74 | +-----------+-------------+-----------+-------------+------------+ 75 | | Source | Voltage (V) | Total (A) | Dynamic (A) | Static (A) | 76 | +-----------+-------------+-----------+-------------+------------+ 77 | | Vccint | 1.000 | 0.152 | 0.018 | 0.134 | 78 | | Vccaux | 1.800 | 0.041 | 0.003 | 0.038 | 79 | | Vcco33 | 3.300 | 0.000 | 0.000 | 0.000 | 80 | | Vcco25 | 2.500 | 0.000 | 0.000 | 0.000 | 81 | | Vcco18 | 1.800 | 0.019 | 0.018 | 0.001 | 82 | | Vcco15 | 1.500 | 0.000 | 0.000 | 0.000 | 83 | | Vcco135 | 1.350 | 0.000 | 0.000 | 0.000 | 84 | | Vcco12 | 1.200 | 0.000 | 0.000 | 0.000 | 85 | | Vccaux_io | 1.800 | 0.000 | 0.000 | 0.000 | 86 | | Vccbram | 1.000 | 0.003 | 0.000 | 0.003 | 87 | | MGTAVcc | 1.000 | 0.000 | 0.000 | 0.000 | 88 | | MGTAVtt | 1.200 | 0.000 | 0.000 | 0.000 | 89 | | MGTVccaux | 1.800 | 0.000 | 0.000 | 0.000 | 90 | | MGTZVccl | 1.075 | 0.000 | 0.000 | 0.000 | 91 | | MGTZAVcc | 1.075 | 0.000 | 0.000 | 0.000 | 92 | | MGTZVcch | 1.800 | 0.000 | 0.000 | 0.000 | 93 | | Vccadc | 1.800 | 0.020 | 0.000 | 0.020 | 94 | +-----------+-------------+-----------+-------------+------------+ 95 | 96 | 97 | 1.3 Confidence Level 98 | -------------------- 99 | 100 | +-----------------------------+------------+-------------------------------------------------------+------------------------------------------------------------------------------------------------------------+ 101 | | User Input Data | Confidence | Details | Action | 102 | +-----------------------------+------------+-------------------------------------------------------+------------------------------------------------------------------------------------------------------------+ 103 | | Design implementation state | High | Design is routed | | 104 | | Clock nodes activity | High | User specified more than 95% of clocks | | 105 | | I/O nodes activity | Medium | More than 5% of inputs are missing user specification | Provide missing input activity with simulation results or by editing the "By Resource Type -> I/Os" view | 106 | | Internal nodes activity | Medium | User specified less than 25% of internal nodes | Provide missing internal nodes activity with simulation results or by editing the "By Resource Type" views | 107 | | Device models | High | Device models are Production | | 108 | | | | | | 109 | | Overall confidence level | Medium | | | 110 | +-----------------------------+------------+-------------------------------------------------------+------------------------------------------------------------------------------------------------------------+ 111 | 112 | 113 | 2. Settings 114 | ----------- 115 | 116 | 2.1 Environment 117 | --------------- 118 | 119 | +-----------------------+--------------------------+ 120 | | Ambient Temp (C) | 25.0 | 121 | | ThetaJA (C/W) | 1.4 | 122 | | Airflow (LFM) | 250 | 123 | | Heat Sink | medium (Medium Profile) | 124 | | ThetaSA (C/W) | 2.4 | 125 | | Board Selection | medium (10"x10") | 126 | | # of Board Layers | 12to15 (12 to 15 Layers) | 127 | | Board Temperature (C) | 25.0 | 128 | +-----------------------+--------------------------+ 129 | 130 | 131 | 2.2 Clock Constraints 132 | --------------------- 133 | 134 | +-------+--------+-----------------+ 135 | | Clock | Domain | Constraint (ns) | 136 | +-------+--------+-----------------+ 137 | | clk | clk | 10.0 | 138 | +-------+--------+-----------------+ 139 | 140 | 141 | 3. Detailed Reports 142 | ------------------- 143 | 144 | 3.1 By Hierarchy 145 | ---------------- 146 | 147 | +----------+-----------+ 148 | | Name | Power (W) | 149 | +----------+-----------+ 150 | | pipeline | 0.055 | 151 | | qmaxt0 | 0.004 | 152 | | qt0 | 0.004 | 153 | +----------+-----------+ 154 | 155 | 156 | -------------------------------------------------------------------------------- /64_4 ag4bits data32bits fixedpt/pipeline_utilization_placed.rpt: -------------------------------------------------------------------------------- 1 | Copyright 1986-2019 Xilinx, Inc. All Rights Reserved. 2 | ------------------------------------------------------------------------------------------------------------- 3 | | Tool Version : Vivado v.2019.1 (win64) Build 2552052 Fri May 24 14:49:42 MDT 2019 4 | | Date : Sat Sep 14 17:15:21 2019 5 | | Host : DESKTOP-18L2VEF running 64-bit major release (build 9200) 6 | | Command : report_utilization -file pipeline_utilization_placed.rpt -pb pipeline_utilization_placed.pb 7 | | Design : pipeline 8 | | Device : 7vx485tffg1157-1 9 | | Design State : Fully Placed 10 | ------------------------------------------------------------------------------------------------------------- 11 | 12 | Utilization Design Information 13 | 14 | Table of Contents 15 | ----------------- 16 | 1. Slice Logic 17 | 1.1 Summary of Registers by Type 18 | 2. Slice Logic Distribution 19 | 3. Memory 20 | 4. DSP 21 | 5. IO and GT Specific 22 | 6. Clocking 23 | 7. Specific Feature 24 | 8. Primitives 25 | 9. Black Boxes 26 | 10. Instantiated Netlists 27 | 28 | 1. Slice Logic 29 | -------------- 30 | 31 | +----------------------------+------+-------+-----------+-------+ 32 | | Site Type | Used | Fixed | Available | Util% | 33 | +----------------------------+------+-------+-----------+-------+ 34 | | Slice LUTs | 85 | 0 | 303600 | 0.03 | 35 | | LUT as Logic | 79 | 0 | 303600 | 0.03 | 36 | | LUT as Memory | 6 | 0 | 130800 | <0.01 | 37 | | LUT as Distributed RAM | 0 | 0 | | | 38 | | LUT as Shift Register | 6 | 0 | | | 39 | | Slice Registers | 233 | 0 | 607200 | 0.04 | 40 | | Register as Flip Flop | 233 | 0 | 607200 | 0.04 | 41 | | Register as Latch | 0 | 0 | 607200 | 0.00 | 42 | | F7 Muxes | 0 | 0 | 151800 | 0.00 | 43 | | F8 Muxes | 0 | 0 | 75900 | 0.00 | 44 | +----------------------------+------+-------+-----------+-------+ 45 | 46 | 47 | 1.1 Summary of Registers by Type 48 | -------------------------------- 49 | 50 | +-------+--------------+-------------+--------------+ 51 | | Total | Clock Enable | Synchronous | Asynchronous | 52 | +-------+--------------+-------------+--------------+ 53 | | 0 | _ | - | - | 54 | | 0 | _ | - | Set | 55 | | 0 | _ | - | Reset | 56 | | 0 | _ | Set | - | 57 | | 0 | _ | Reset | - | 58 | | 0 | Yes | - | - | 59 | | 0 | Yes | - | Set | 60 | | 0 | Yes | - | Reset | 61 | | 0 | Yes | Set | - | 62 | | 233 | Yes | Reset | - | 63 | +-------+--------------+-------------+--------------+ 64 | 65 | 66 | 2. Slice Logic Distribution 67 | --------------------------- 68 | 69 | +--------------------------------------------+------+-------+-----------+-------+ 70 | | Site Type | Used | Fixed | Available | Util% | 71 | +--------------------------------------------+------+-------+-----------+-------+ 72 | | Slice | 68 | 0 | 75900 | 0.09 | 73 | | SLICEL | 44 | 0 | | | 74 | | SLICEM | 24 | 0 | | | 75 | | LUT as Logic | 79 | 0 | 303600 | 0.03 | 76 | | using O5 output only | 0 | | | | 77 | | using O6 output only | 12 | | | | 78 | | using O5 and O6 | 67 | | | | 79 | | LUT as Memory | 6 | 0 | 130800 | <0.01 | 80 | | LUT as Distributed RAM | 0 | 0 | | | 81 | | LUT as Shift Register | 6 | 0 | | | 82 | | using O5 output only | 0 | | | | 83 | | using O6 output only | 4 | | | | 84 | | using O5 and O6 | 2 | | | | 85 | | Slice Registers | 233 | 0 | 607200 | 0.04 | 86 | | Register driven from within the Slice | 24 | | | | 87 | | Register driven from outside the Slice | 209 | | | | 88 | | LUT in front of the register is unused | 179 | | | | 89 | | LUT in front of the register is used | 30 | | | | 90 | | Unique Control Sets | 2 | | 75900 | <0.01 | 91 | +--------------------------------------------+------+-------+-----------+-------+ 92 | * Note: Available Control Sets calculated as Slice Registers / 8, Review the Control Sets Report for more information regarding control sets. 93 | 94 | 95 | 3. Memory 96 | --------- 97 | 98 | +-------------------+------+-------+-----------+-------+ 99 | | Site Type | Used | Fixed | Available | Util% | 100 | +-------------------+------+-------+-----------+-------+ 101 | | Block RAM Tile | 1 | 0 | 1030 | 0.10 | 102 | | RAMB36/FIFO* | 0 | 0 | 1030 | 0.00 | 103 | | RAMB18 | 2 | 0 | 2060 | 0.10 | 104 | | RAMB18E1 only | 2 | | | | 105 | +-------------------+------+-------+-----------+-------+ 106 | * Note: Each Block RAM Tile only has one FIFO logic available and therefore can accommodate only one FIFO36E1 or one FIFO18E1. However, if a FIFO18E1 occupies a Block RAM Tile, that tile can still accommodate a RAMB18E1 107 | 108 | 109 | 4. DSP 110 | ------ 111 | 112 | +----------------+------+-------+-----------+-------+ 113 | | Site Type | Used | Fixed | Available | Util% | 114 | +----------------+------+-------+-----------+-------+ 115 | | DSPs | 6 | 0 | 2800 | 0.21 | 116 | | DSP48E1 only | 6 | | | | 117 | +----------------+------+-------+-----------+-------+ 118 | 119 | 120 | 5. IO and GT Specific 121 | --------------------- 122 | 123 | +-----------------------------+------+-------+-----------+-------+ 124 | | Site Type | Used | Fixed | Available | Util% | 125 | +-----------------------------+------+-------+-----------+-------+ 126 | | Bonded IOB | 51 | 0 | 600 | 8.50 | 127 | | IOB Master Pads | 25 | | | | 128 | | IOB Slave Pads | 24 | | | | 129 | | Bonded IPADs | 0 | 0 | 62 | 0.00 | 130 | | Bonded OPADs | 0 | 0 | 40 | 0.00 | 131 | | PHY_CONTROL | 0 | 0 | 14 | 0.00 | 132 | | PHASER_REF | 0 | 0 | 14 | 0.00 | 133 | | OUT_FIFO | 0 | 0 | 56 | 0.00 | 134 | | IN_FIFO | 0 | 0 | 56 | 0.00 | 135 | | IDELAYCTRL | 0 | 0 | 14 | 0.00 | 136 | | IBUFDS | 0 | 0 | 576 | 0.00 | 137 | | GTXE2_COMMON | 0 | 0 | 5 | 0.00 | 138 | | GTXE2_CHANNEL | 0 | 0 | 20 | 0.00 | 139 | | PHASER_OUT/PHASER_OUT_PHY | 0 | 0 | 56 | 0.00 | 140 | | PHASER_IN/PHASER_IN_PHY | 0 | 0 | 56 | 0.00 | 141 | | IDELAYE2/IDELAYE2_FINEDELAY | 0 | 0 | 700 | 0.00 | 142 | | ODELAYE2/ODELAYE2_FINEDELAY | 0 | 0 | 700 | 0.00 | 143 | | IBUFDS_GTE2 | 0 | 0 | 10 | 0.00 | 144 | | ILOGIC | 0 | 0 | 600 | 0.00 | 145 | | OLOGIC | 0 | 0 | 600 | 0.00 | 146 | +-----------------------------+------+-------+-----------+-------+ 147 | 148 | 149 | 6. Clocking 150 | ----------- 151 | 152 | +------------+------+-------+-----------+-------+ 153 | | Site Type | Used | Fixed | Available | Util% | 154 | +------------+------+-------+-----------+-------+ 155 | | BUFGCTRL | 1 | 0 | 32 | 3.13 | 156 | | BUFIO | 0 | 0 | 56 | 0.00 | 157 | | MMCME2_ADV | 0 | 0 | 14 | 0.00 | 158 | | PLLE2_ADV | 0 | 0 | 14 | 0.00 | 159 | | BUFMRCE | 0 | 0 | 28 | 0.00 | 160 | | BUFHCE | 0 | 0 | 168 | 0.00 | 161 | | BUFR | 0 | 0 | 56 | 0.00 | 162 | +------------+------+-------+-----------+-------+ 163 | 164 | 165 | 7. Specific Feature 166 | ------------------- 167 | 168 | +-------------+------+-------+-----------+-------+ 169 | | Site Type | Used | Fixed | Available | Util% | 170 | +-------------+------+-------+-----------+-------+ 171 | | BSCANE2 | 0 | 0 | 4 | 0.00 | 172 | | CAPTUREE2 | 0 | 0 | 1 | 0.00 | 173 | | DNA_PORT | 0 | 0 | 1 | 0.00 | 174 | | EFUSE_USR | 0 | 0 | 1 | 0.00 | 175 | | FRAME_ECCE2 | 0 | 0 | 1 | 0.00 | 176 | | ICAPE2 | 0 | 0 | 2 | 0.00 | 177 | | PCIE_2_1 | 0 | 0 | 4 | 0.00 | 178 | | STARTUPE2 | 0 | 0 | 1 | 0.00 | 179 | | XADC | 0 | 0 | 1 | 0.00 | 180 | +-------------+------+-------+-----------+-------+ 181 | 182 | 183 | 8. Primitives 184 | ------------- 185 | 186 | +----------+------+---------------------+ 187 | | Ref Name | Used | Functional Category | 188 | +----------+------+---------------------+ 189 | | FDRE | 233 | Flop & Latch | 190 | | LUT4 | 71 | LUT | 191 | | OBUF | 48 | IO | 192 | | LUT3 | 47 | LUT | 193 | | LUT2 | 21 | LUT | 194 | | CARRY4 | 20 | CarryLogic | 195 | | SRL16E | 8 | Distributed Memory | 196 | | DSP48E1 | 6 | Block Arithmetic | 197 | | LUT6 | 3 | LUT | 198 | | LUT5 | 3 | LUT | 199 | | IBUF | 3 | IO | 200 | | RAMB18E1 | 2 | Block Memory | 201 | | LUT1 | 1 | LUT | 202 | | BUFG | 1 | Clock | 203 | +----------+------+---------------------+ 204 | 205 | 206 | 9. Black Boxes 207 | -------------- 208 | 209 | +----------+------+ 210 | | Ref Name | Used | 211 | +----------+------+ 212 | 213 | 214 | 10. Instantiated Netlists 215 | ------------------------- 216 | 217 | +----------+------+ 218 | | Ref Name | Used | 219 | +----------+------+ 220 | 221 | 222 | -------------------------------------------------------------------------------- /64_4 ag4bits data32bits fixedpt/qmaxtable.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | //qmax values stored on BRAM 3 | //width depends on range of q value, depth depends on number of states 4 | module qmaxtable #(parameter ADDR_WIDTH = 6, DATA_WIDTH = 32, DEPTH = 64) ( 5 | input wire i_clk, 6 | input wire i_rst, 7 | input wire [ADDR_WIDTH-1:0] i_addr_r, 8 | input wire [ADDR_WIDTH-1:0] i_addr_w, 9 | input wire i_read_en, 10 | input wire i_write_en, 11 | input wire [DATA_WIDTH-1:0] i_data, 12 | output reg [DATA_WIDTH-1:0] o_data 13 | //output reg [DATA_WIDTH-1:0] o_data2 14 | ); 15 | integer i; 16 | reg [DATA_WIDTH-1:0] memory_array [0:DEPTH-1]; 17 | //initialize the bram: depends on the test case 18 | /*initial begin 19 | memory_array[0] <= 0; 20 | for (i=0;i11 or (8,7)=>10 gets big reward 48 | 8'b110_111_11: o_data<= {DATA_WIDTH{1'b1}}; 49 | 8'b111_110_10: o_data<= {DATA_WIDTH{1'b1}}; 50 | //... depends on the dataset?? 51 | 52 | 53 | default : o_data<= {DATA_WIDTH{1'b0}}; //others no reward 54 | endcase 55 | $display("r read %02h from: %08b\n", o_data, i_addr); 56 | end 57 | endmodule -------------------------------------------------------------------------------- /64_4 ag4bits data32bits fixedpt/testbench.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 09/02/2019 10:56:26 AM 7 | // Design Name: 8 | // Module Name: testbench 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | module testbench( ); 22 | reg i_clk; 23 | reg i_rst; 24 | reg [1:0] a; 25 | reg[31:0] al; //xxxx.xxxx 0000_0010=0.125, fixed point representation for alpha and gamma 26 | reg[31:0] ga; 27 | //reg[7:0] alpha_in; 28 | //reg[7:0] gamma_in; 29 | wire [47:0] out; 30 | 31 | initial begin 32 | i_clk<=1; 33 | //#5 34 | i_rst<=1; 35 | //al<=8'b0000_0010; 36 | //ga<=8'b0000_0010; 37 | //al<=8'b00000010; 38 | //ga<=8'b00111111010011001100110011001101; 39 | 40 | #5 i_rst<=0; 41 | forever begin 42 | #10 a<=$urandom%4; 43 | end 44 | //alpha_in=8'b0000_0001; 45 | //gamma_in=8'b0000_0001; 46 | end 47 | 48 | always begin 49 | #5 i_clk=~i_clk; 50 | 51 | end 52 | 53 | pipeline test( 54 | .clk(i_clk), 55 | .rst(i_rst), 56 | .action(a), 57 | //.alpha(al), 58 | //.gamma(ga), 59 | //.cina(ina), 60 | //.cinb(inb), 61 | //.alpha(alpha_in), 62 | //.gamma(gamma_in), 63 | .sum(out)); 64 | 65 | endmodule 66 | `timescale 1ns / 1ps 67 | ////////////////////////////////////////////////////////////////////////////////// 68 | // Company: 69 | // Engineer: 70 | // 71 | // Create Date: 09/02/2019 10:56:26 AM 72 | // Design Name: 73 | // Module Name: testbench 74 | // Project Name: 75 | // Target Devices: 76 | // Tool Versions: 77 | // Description: 78 | // 79 | // Dependencies: 80 | // 81 | // Revision: 82 | // Revision 0.01 - File Created 83 | // Additional Comments: 84 | // 85 | ////////////////////////////////////////////////////////////////////////////////// 86 | module testbench( ); 87 | reg i_clk; 88 | reg i_rst; 89 | reg [1:0] a; 90 | reg[31:0] al; //xxxx.xxxx 0000_0010=0.125, fixed point representation for alpha and gamma 91 | reg[31:0] ga; 92 | //reg[7:0] alpha_in; 93 | //reg[7:0] gamma_in; 94 | wire [47:0] out; 95 | 96 | initial begin 97 | i_clk<=1; 98 | //#5 99 | i_rst<=1; 100 | //al<=8'b0000_0010; 101 | //ga<=8'b0000_0010; 102 | //al<=8'b00000010; 103 | //ga<=8'b00111111010011001100110011001101; 104 | 105 | #5 i_rst<=0; 106 | forever begin 107 | #10 a<=$urandom%4; 108 | end 109 | //alpha_in=8'b0000_0001; 110 | //gamma_in=8'b0000_0001; 111 | end 112 | 113 | always begin 114 | #5 i_clk=~i_clk; 115 | 116 | end 117 | 118 | pipeline test( 119 | .clk(i_clk), 120 | .rst(i_rst), 121 | .action(a), 122 | //.alpha(al), 123 | //.gamma(ga), 124 | //.cina(ina), 125 | //.cinb(inb), 126 | //.alpha(alpha_in), 127 | //.gamma(gamma_in), 128 | .sum(out)); 129 | 130 | endmodule 131 | -------------------------------------------------------------------------------- /64_8 float/pipeline.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 09/02/2019 10:53:28 AM 7 | // Design Name: 8 | // Module Name: pipeline 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | 22 | 23 | //store tables in BRAMs 24 | //width depends on range of q value, depth depends on number of states times num of actions 25 | 26 | //The 4-stage pipeline 27 | //inputs: action 28 | module pipeline #(parameter ADDR_Q_WIDTH = 9,parameter ADDR_Qmax_WIDTH = 6, DATA_WIDTH = 32) ( input clk,input rst, input[2:0] action, output reg[63:0] sum); 29 | 30 | //used in stage 1 31 | reg[DATA_WIDTH-1:0] q; //q value 32 | reg[DATA_WIDTH-1:0] r; //reward 33 | reg[DATA_WIDTH-1:0] q1; //q value 34 | reg[DATA_WIDTH-1:0] r1; //reward 35 | reg[DATA_WIDTH-1:0] qmax; 36 | 37 | reg[ADDR_Qmax_WIDTH-1:0] s; //2^16 possible states (256x256 (x,y) grid, s[15:8]s -> x, s[7:0] -> y) 38 | reg[DATA_WIDTH-1:0] alpha; 39 | reg[DATA_WIDTH-1:0] gamma; 40 | reg[DATA_WIDTH-1:0] oneminusa; //1-alpha 41 | reg[DATA_WIDTH-1:0] ag; //alpha*gamma 42 | wire result_tvalid; 43 | wire result_tvalid2; 44 | wire result_tvalid3; 45 | 46 | //propagate for qmax writing address 47 | reg[ADDR_Qmax_WIDTH-1:0] current_s ; 48 | reg[ADDR_Qmax_WIDTH-1:0] current_s1 ; 49 | reg[ADDR_Qmax_WIDTH-1:0] current_s2 ; 50 | reg[ADDR_Qmax_WIDTH-1:0] current_s3 ; 51 | reg[ADDR_Qmax_WIDTH-1:0] current_s4 ; 52 | //propagate for q writing address 53 | reg[2:0] current_a ; 54 | reg[2:0] current_a1 ; 55 | reg[2:0] current_a2 ; 56 | reg[2:0] current_a3 ; 57 | reg[2:0] current_a4 ; 58 | 59 | reg[2:0] sx ; // s[5:3]s -> x, 60 | reg[2:0] sy ; // s[2:0] -> y) 61 | reg[ADDR_Qmax_WIDTH-1:0] nexts; //next state for state transition 62 | 63 | //used in stage 2 64 | 65 | //used in stage 3 66 | //reg [23:0] sum; 67 | 68 | //used in stage 1 and 4 69 | //used for q table reading & writing 70 | reg [ADDR_Q_WIDTH-1:0] addrr_q; 71 | reg [ADDR_Q_WIDTH-1:0] addrw_q; 72 | //reg [7:0] addrr_q_tmp; 73 | //reg [7:0] addr_r_tmp; 74 | reg rflag_q; //0 or 1 75 | reg wflag_q; //0 or 1 76 | reg [DATA_WIDTH-1:0] data_in_q; 77 | wire [DATA_WIDTH-1:0] data_out_q; 78 | 79 | //used for qmax table reading & writing 80 | reg [ADDR_Qmax_WIDTH-1:0] addrr_qmax; 81 | reg [ADDR_Qmax_WIDTH-1:0] addrw_qmax; 82 | reg rflag_qmax; //0 or 1 83 | reg wflag_qmax; //0 or 1 84 | reg [DATA_WIDTH-1:0] data_in_qmax; 85 | wire [DATA_WIDTH-1:0] data_out_qmax; 86 | 87 | //used for r table reading 88 | reg [ADDR_Q_WIDTH-1:0] addr_r; 89 | reg rflag_r; //0 or 1 90 | wire [DATA_WIDTH-1:0] data_out_r; 91 | localparam sf = 2.0**-4.0; 92 | //--------------stage 1----------------- 93 | always @(posedge clk) begin 94 | //initialize state and action 95 | if (rst) begin 96 | s<= {16{1'b0}}; 97 | current_s<={16{1'b0}}; 98 | nexts<={16{1'b0}}; 99 | alpha<=32'b00111111010011001100110011001101; //0.8 100 | gamma<=32'b00111111010011001100110011001101; //0.8 101 | 102 | end 103 | 104 | //calculate 1-a and a*g 105 | //scaling factor=2.0**-4.0 _ 106 | //ag <= alpha*gamma; 107 | ag<=32'b01000111101011100001011; //0.8*0.8 108 | //oneminusa <= 32'b00111111100000000000000000000000 - alpha; 109 | oneminusa<=32'b10011001100110011001100; //1-0.8 110 | 111 | //locate next state 112 | sx<=s[5:3];sy<=s[2:0]; 113 | if (sx=={3{1'b0}} && (action==3'b000)||(action==3'b001)||(action==3'b111)) begin //left wall 114 | nexts<=s; 115 | end 116 | else if (sy=={3{1'b0}} && (action==3'b001)||(action==3'b010)||(action==3'b011)) begin //up wall 117 | nexts<=s; 118 | end 119 | else if (sx=={3{1'b1}} &&(action==3'b011)||(action==3'b100)||(action==3'b101)) begin //right wall 120 | nexts<=s; 121 | end 122 | else if (sy=={3{1'b1}} && (action==3'b101)||(action==3'b110)||(action==3'b111)) begin //down wall 123 | nexts<=s; 124 | end 125 | else begin 126 | case (action) 127 | 3'b000: nexts<=s-6'b001_000;//to the left by 1 128 | 3'b001: nexts<=s-6'b001_001;//to the left-up by 1 129 | 3'b010: nexts<=s-6'b000_001;//to the up by 1 130 | 3'b011: nexts<=s-6'b000_001+6'b001_000;//to the up-right by 1 131 | 3'b100: nexts<=s+6'b001_000;//to the right by 1 132 | 3'b101: nexts<=s+6'b001_001;//to the right-down by 1 133 | 3'b110: nexts<=s+6'b000_001;//to the down by 1 134 | 3'b111: nexts<=s+6'b000_001-6'b001_000;//to the down-left by 1 135 | //default: 136 | endcase 137 | //nexts<={sx,sy}; 138 | end 139 | 140 | //get address for q and r and qmax 141 | addrr_q<={s,action}; 142 | addr_r<={s,action}; 143 | addrr_qmax<=nexts; 144 | //$display("stage 1 s: %06b, action:%02b", s,action); 145 | //$display("stage 1 nexts: %06b", nexts); 146 | //$display("stage 1 addrr_q:%08b, addr_r:%08b, addr_qmax:%06b", addrr_q,addr_r,addrr_qmax); 147 | 148 | //wait and transit the state 149 | current_s<=s; 150 | current_s1<=current_s; 151 | current_a<=action; 152 | current_a1<=current_a; 153 | s<=nexts; 154 | end 155 | 156 | 157 | 158 | 159 | //--------------stage 2----------------- 160 | always @(posedge clk) begin 161 | //locate q value from q table, save in q register 162 | // $display("stage 2 s: %06b,current_s: %06b, action:%02b, addrr_q,%08b", s,current_s,action,addrr_q); 163 | rflag_q<=1; 164 | q<=data_out_q; 165 | q1<=q; 166 | 167 | rflag_r<=1; 168 | r<=data_out_r; 169 | r1<=r; 170 | //$display("stage 2 r1: %02h", r1); 171 | //$display("stage 2 q1: %02h", q1); 172 | //locate Qmax at next state from Qmax table 173 | 174 | rflag_qmax<=1; 175 | qmax<=data_out_qmax; 176 | //$display("stage 2 nexts: %06b", nexts); 177 | //$display("stage 2 addrr_qmax: %06b", addrr_qmax); 178 | //$display("stage 2 qmax: %02h", qmax); 179 | 180 | current_s2<=current_s1; 181 | current_a2<=current_a1; 182 | 183 | end 184 | 185 | //--------------stage 3----------------- 186 | //always @(qmax or r or q or ag or oneminusa) begin 187 | 188 | wire [31:0] sum_part1; 189 | wire [31:0] sum_part2; 190 | wire [31:0] sum_part3; 191 | 192 | /*reg [31:0] sum_1; 193 | reg [31:0] sum_2; 194 | reg [31:0] sum_3;*/ 195 | 196 | always@(posedge clk) 197 | begin 198 | //sum_part1 <= alpha*r1; 199 | //sum_part2 <= oneminusa*q1; 200 | // sum_part3 <= ag*qmax; 201 | 202 | //calculations of q learning function 203 | //adder 204 | sum <= sum_part1 + sum_part2 + sum_part3; 205 | //$display("stage 3 sum: %04h", sum); 206 | 207 | current_s3<=current_s2; 208 | current_a3<=current_a2; 209 | current_s4<=current_s3; 210 | current_a4<=current_a3; 211 | 212 | end 213 | /* 214 | always @(posedge clk) begin 215 | //calculations of q learning function 216 | //adder 217 | sum <= alpha*r1 + oneminusa*q1 + ag*qmax; 218 | //sum <= alpha*r1*2**(-4) + oneminusa*q1*2**(-4) + ag*qmax*2**(-8); 219 | //$display("stage 3 sum: %04h", sum); 220 | 221 | current_s3<=current_s2; 222 | current_a3<=current_a2; 223 | end */ 224 | 225 | 226 | 227 | //--------------stage 4----------------- 228 | //always @(sum) begin 229 | always @(posedge clk) begin 230 | // if(ce) begin 231 | //write back to qmax table 232 | if (sum>q)begin 233 | wflag_qmax<=1; 234 | addrw_qmax<=current_s3; 235 | data_in_qmax<=sum; 236 | //$display("stage 4 update qmax data_in_qmax: %02h", data_in_qmax); 237 | //$display("stage 4 update qmax addrw_qmax: %06b", addrw_qmax); 238 | end 239 | //write back to q table 240 | wflag_q<=1; 241 | addrw_q<={current_s3,current_a3}; 242 | data_in_q<=sum; 243 | //$display("stage 4 update q data_in_q: %02h", data_in_q); 244 | //$display("stage 4 update q addrw_q: %08b", addrw_q); 245 | //stop the pipeline if reached end state 246 | //if (current_s3 == 6'b111111) begin 247 | // $finish; 248 | //end 249 | //end 250 | end 251 | 252 | qtable qt0( 253 | .i_clk(clk), 254 | .i_rst(rst), 255 | .i_addr_r(addrr_q), 256 | .i_addr_w(addrw_q), 257 | .i_read_en(rflag_q), 258 | .i_write_en(wflag_q), 259 | .i_data(data_in_q), 260 | .o_data(data_out_q)); 261 | 262 | qmaxtable qmaxt0( 263 | .i_clk(clk), 264 | .i_rst(rst), 265 | .i_addr_r(addrr_qmax), 266 | .i_addr_w(addrw_qmax), 267 | .i_read_en(rflag_qmax), 268 | .i_write_en(wflag_qmax), 269 | .i_data(data_in_qmax), 270 | .o_data(data_out_qmax)); 271 | 272 | rtable rt0( 273 | .i_clk(clk), 274 | .i_addr(addr_r), 275 | .i_read(rflag_r), 276 | .o_data(data_out_r)); 277 | 278 | floating_point_0 sum1 ( 279 | .aclk(clk), // input wire aclk 280 | .s_axis_a_tvalid(1'b1), // input wire s_axis_a_tvalid 281 | .s_axis_a_tdata(alpha), // input wire [31 : 0] s_axis_a_tdata 282 | .s_axis_b_tvalid(1'b1), // input wire s_axis_b_tvalid 283 | .s_axis_b_tdata(r1), // input wire [31 : 0] s_axis_b_tdata 284 | .m_axis_result_tvalid(result_tvalid), // output wire m_axis_result_tvalid 285 | .m_axis_result_tdata(sum_part1) // output wire [31 : 0] m_axis_result_tdata 286 | ); 287 | 288 | floating_point_0 sum2 ( 289 | .aclk(clk), // input wire aclk 290 | .s_axis_a_tvalid(1'b1), // input wire s_axis_a_tvalid 291 | .s_axis_a_tdata(oneminusa), // input wire [31 : 0] s_axis_a_tdata 292 | .s_axis_b_tvalid(1'b1), // input wire s_axis_b_tvalid 293 | .s_axis_b_tdata(q1), // input wire [31 : 0] s_axis_b_tdata 294 | .m_axis_result_tvalid(result_tvalid2), // output wire m_axis_result_tvalid 295 | .m_axis_result_tdata(sum_part2) // output wire [31 : 0] m_axis_result_tdata 296 | ); 297 | 298 | floating_point_0 sum3 ( 299 | .aclk(clk), // input wire aclk 300 | .s_axis_a_tvalid(1'b1), // input wire s_axis_a_tvalid 301 | .s_axis_a_tdata(ag), // input wire [31 : 0] s_axis_a_tdata 302 | .s_axis_b_tvalid(1'b1), // input wire s_axis_b_tvalid 303 | .s_axis_b_tdata(qmax), // input wire [31 : 0] s_axis_b_tdata 304 | .m_axis_result_tvalid(result_tvalid3), // output wire m_axis_result_tvalid 305 | .m_axis_result_tdata(sum_part3 ) // output wire [31 : 0] m_axis_result_tdata 306 | ); 307 | 308 | endmodule -------------------------------------------------------------------------------- /64_8 float/qmaxtable.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | //qmax values stored on BRAM 3 | //width depends on range of q value, depth depends on number of states 4 | module qmaxtable #(parameter ADDR_WIDTH = 6, DATA_WIDTH = 32, DEPTH = 64) ( 5 | input wire i_clk, 6 | input wire i_rst, 7 | input wire [ADDR_WIDTH-1:0] i_addr_r, 8 | input wire [ADDR_WIDTH-1:0] i_addr_w, 9 | input wire i_read_en, 10 | input wire i_write_en, 11 | input wire [DATA_WIDTH-1:0] i_data, 12 | output reg [DATA_WIDTH-1:0] o_data 13 | //output reg [DATA_WIDTH-1:0] o_data2 14 | ); 15 | integer i; 16 | reg [DATA_WIDTH-1:0] memory_array [0:DEPTH-1]; 17 | //initialize the bram: depends on the test case 18 | /*initial begin 19 | memory_array[0] <= 0; 20 | for (i=0;i11 or (8,7)=>10 gets big reward 53 | 9'b110_111_100: o_data<= 32'b01000011011111110000000000000000; //255 54 | 9'b111_110_110: o_data<= 32'b01000011011111110000000000000000; 55 | 9'b110_110_101: o_data<= 32'b01000011011111110000000000000000; 56 | //... depends on the dataset?? 57 | 58 | 59 | default : o_data<= {DATA_WIDTH{1'b0}}; //others no reward 60 | endcase 61 | //$display("r read %02h from: %08b\n", o_data, i_addr); 62 | end 63 | endmodule -------------------------------------------------------------------------------- /65536_8 floatpoint/all_float/pipeline.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 09/02/2019 10:53:28 AM 7 | // Design Name: 8 | // Module Name: pipeline 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | 22 | 23 | //store tables in BRAMs 24 | //width depends on range of q value, depth depends on number of states times num of actions 25 | 26 | //The 4-stage pipeline 27 | //inputs: action 28 | module pipeline #(parameter ADDR_Q_WIDTH = 19,parameter ADDR_Qmax_WIDTH = 16, DATA_WIDTH = 32) ( input clk,input rst, input[2:0] action, output wire[31:0] sum); 29 | 30 | //used in stage 1 31 | reg[DATA_WIDTH-1:0] q; //q value 32 | reg[DATA_WIDTH-1:0] r; //reward 33 | reg[DATA_WIDTH-1:0] q1; //q value 34 | reg[DATA_WIDTH-1:0] r1; //reward 35 | reg[DATA_WIDTH-1:0] qmax; 36 | 37 | reg[ADDR_Qmax_WIDTH-1:0] s; //2^16 possible states (256x256 (x,y) grid, s[15:8]s -> x, s[7:0] -> y) 38 | reg[DATA_WIDTH-1:0] alpha; 39 | reg[DATA_WIDTH-1:0] gamma; 40 | reg[DATA_WIDTH-1:0] oneminusa; //1-alpha 41 | reg[DATA_WIDTH-1:0] ag; //alpha*gamma 42 | wire result_tvalid; 43 | wire result_tvalid2; 44 | wire result_tvalid3; 45 | wire result_tvalid12; 46 | wire result_tvalid123; 47 | 48 | //propagate for qmax writing address 49 | reg[ADDR_Qmax_WIDTH-1:0] current_s ; 50 | reg[ADDR_Qmax_WIDTH-1:0] current_s1 ; 51 | reg[ADDR_Qmax_WIDTH-1:0] current_s2 ; 52 | reg[ADDR_Qmax_WIDTH-1:0] current_s3 ; 53 | reg[ADDR_Qmax_WIDTH-1:0] current_s4 ; 54 | //propagate for q writing address 55 | reg[2:0] current_a ; 56 | reg[2:0] current_a1 ; 57 | reg[2:0] current_a2 ; 58 | reg[2:0] current_a3 ; 59 | reg[2:0] current_a4 ; 60 | 61 | reg[7:0] sx ; // s[15:8]s -> x, 62 | reg[7:0] sy ; // s[7:0] -> y) 63 | reg[ADDR_Qmax_WIDTH-1:0] nexts; //next state for state transition 64 | 65 | //used in stage 2 66 | 67 | //used in stage 1 and 4 68 | //used for q table reading & writing 69 | reg [ADDR_Q_WIDTH-1:0] addrr_q; 70 | reg [ADDR_Q_WIDTH-1:0] addrw_q; 71 | //reg [7:0] addrr_q_tmp; 72 | //reg [7:0] addr_r_tmp; 73 | reg rflag_q; //0 or 1 74 | reg wflag_q; //0 or 1 75 | reg [DATA_WIDTH-1:0] data_in_q; 76 | wire [DATA_WIDTH-1:0] data_out_q; 77 | 78 | //used for qmax table reading & writing 79 | reg [ADDR_Qmax_WIDTH-1:0] addrr_qmax; 80 | reg [ADDR_Qmax_WIDTH-1:0] addrw_qmax; 81 | reg rflag_qmax; //0 or 1 82 | reg wflag_qmax; //0 or 1 83 | reg [DATA_WIDTH-1:0] data_in_qmax; 84 | wire [DATA_WIDTH-1:0] data_out_qmax; 85 | 86 | //used for r table reading 87 | reg [ADDR_Q_WIDTH-1:0] addr_r; 88 | reg rflag_r; //0 or 1 89 | wire [DATA_WIDTH-1:0] data_out_r; 90 | localparam sf = 2.0**-4.0; 91 | //--------------stage 1----------------- 92 | always @(posedge clk) begin 93 | //initialize state and action 94 | if (rst) begin 95 | s<= {16{1'b0}}; 96 | current_s<={16{1'b0}}; 97 | nexts<={16{1'b0}}; 98 | alpha<=32'b00111111010011001100110011001101; //0.8 99 | gamma<=32'b00111111010011001100110011001101; //0.8 100 | 101 | end 102 | 103 | //calculate 1-a and a*g 104 | //scaling factor=2.0**-4.0 _ 105 | //ag <= alpha*gamma; 106 | ag<=32'b01000111101011100001011; //0.8*0.8 107 | //oneminusa <= 32'b00111111100000000000000000000000 - alpha; 108 | oneminusa<=32'b10011001100110011001100; //1-0.8 109 | 110 | //locate next state 111 | sx<=s[15:8];sy<=s[7:0]; 112 | if (sx=={8{1'b0}} && (action==3'b000)||(action==3'b001)||(action==3'b111)) begin //left wall 113 | nexts<=s; 114 | end 115 | else if (sy=={8{1'b0}} && (action==3'b001)||(action==3'b010)||(action==3'b011)) begin //up wall 116 | nexts<=s; 117 | end 118 | else if (sx=={8{1'b1}} &&(action==3'b011)||(action==3'b100)||(action==3'b101)) begin //right wall 119 | nexts<=s; 120 | end 121 | else if (sy=={8{1'b1}} && (action==3'b101)||(action==3'b110)||(action==3'b111)) begin //down wall 122 | nexts<=s; 123 | end 124 | else begin 125 | case (action) 126 | 3'b000: nexts<=s-16'b0000_0001_0000_0000;//to the left by 1 127 | 3'b001: nexts<=s-16'b0000_0001_0000_0001;//to the left-up by 1 128 | 3'b010: nexts<=s-16'b0000_0000_0000_0001;//to the up by 1 129 | 3'b011: nexts<=s-16'b0000_0000_0000_0001+16'b0000_0001_0000_0000;//to the up-right by 1 130 | 3'b100: nexts<=s+16'b0000_0001_0000_0000;//to the right by 1 131 | 3'b101: nexts<=s+16'b0000_0001_0000_0001;//to the right-down by 1 132 | 3'b110: nexts<=s+16'b0000_0000_0000_0001;//to the down by 1 133 | 3'b111: nexts<=s+16'b0000_0000_0000_0001-16'b0000_0001_0000_0000;//to the down-left by 1 134 | //default: 135 | endcase 136 | //nexts<={sx,sy}; 137 | end 138 | 139 | //get address for q and r and qmax 140 | addrr_q<={s,action}; 141 | addr_r<={s,action}; 142 | addrr_qmax<=nexts; 143 | //$display("stage 1 s: %06b, action:%02b", s,action); 144 | //$display("stage 1 nexts: %06b", nexts); 145 | //$display("stage 1 addrr_q:%08b, addr_r:%08b, addr_qmax:%06b", addrr_q,addr_r,addrr_qmax); 146 | 147 | //wait and transit the state 148 | current_s<=s; 149 | current_s1<=current_s; 150 | current_a<=action; 151 | current_a1<=current_a; 152 | s<=nexts; 153 | end 154 | 155 | 156 | 157 | //--------------stage 2----------------- 158 | always @(posedge clk) begin 159 | //locate q value from q table, save in q register 160 | // $display("stage 2 s: %06b,current_s: %06b, action:%02b, addrr_q,%08b", s,current_s,action,addrr_q); 161 | rflag_q<=1; 162 | q<=data_out_q; 163 | q1<=q; 164 | 165 | rflag_r<=1; 166 | r<=data_out_r; 167 | r1<=r; 168 | //$display("stage 2 r1: %02h", r1); 169 | //$display("stage 2 q1: %02h", q1); 170 | //locate Qmax at next state from Qmax table 171 | 172 | rflag_qmax<=1; 173 | qmax<=data_out_qmax; 174 | //$display("stage 2 nexts: %06b", nexts); 175 | //$display("stage 2 addrr_qmax: %06b", addrr_qmax); 176 | //$display("stage 2 qmax: %02h", qmax); 177 | 178 | current_s2<=current_s1; 179 | current_a2<=current_a1; 180 | 181 | end 182 | 183 | //--------------stage 3----------------- 184 | //always @(qmax or r or q or ag or oneminusa) begin 185 | 186 | wire [31:0] sum_part1; 187 | wire [31:0] sum_part2; 188 | wire [31:0] sum_part12; 189 | 190 | wire [31:0] sum_part3; 191 | reg [31:0] sum_part3a; 192 | 193 | 194 | wire [31:0] sum_part123; 195 | 196 | 197 | /*reg [31:0] sum_1; 198 | reg [31:0] sum_2; 199 | reg [31:0] sum_3;*/ 200 | 201 | always@(posedge clk) 202 | begin 203 | //sum_part1 <= alpha*r1; 204 | //sum_part2 <= oneminusa*q1; 205 | // sum_part3 <= ag*qmax; 206 | 207 | //calculations of q learning function 208 | //adder 209 | //sum <= sum_part1 + sum_part2 + sum_part3; 210 | sum_part3a<=sum_part3; 211 | //sum<=sum_part123; 212 | //$display("stage 3 sum: %04h", sum); 213 | 214 | current_s3<=current_s2; 215 | current_a3<=current_a2; 216 | current_s4<=current_s3; 217 | current_a4<=current_a3; 218 | 219 | end 220 | 221 | //--------------stage 4----------------- 222 | //always @(sum) begin 223 | always @(posedge clk) begin 224 | // if(ce) begin 225 | //write back to qmax table 226 | if (sum>q)begin 227 | wflag_qmax<=1; 228 | addrw_qmax<=current_s3; 229 | data_in_qmax<=sum; 230 | //$display("stage 4 update qmax data_in_qmax: %02h", data_in_qmax); 231 | //$display("stage 4 update qmax addrw_qmax: %06b", addrw_qmax); 232 | end 233 | //write back to q table 234 | wflag_q<=1; 235 | addrw_q<={current_s3,current_a3}; 236 | data_in_q<=sum; 237 | //$display("stage 4 update q data_in_q: %02h", data_in_q); 238 | //$display("stage 4 update q addrw_q: %08b", addrw_q); 239 | //stop the pipeline if reached end state 240 | //if (current_s3 == 6'b111111) begin 241 | // $finish; 242 | //end 243 | //end 244 | end 245 | 246 | qtable qt0( 247 | .i_clk(clk), 248 | .i_rst(rst), 249 | .i_addr_r(addrr_q), 250 | .i_addr_w(addrw_q), 251 | .i_read_en(rflag_q), 252 | .i_write_en(wflag_q), 253 | .i_data(data_in_q), 254 | .o_data(data_out_q)); 255 | 256 | qmaxtable qmaxt0( 257 | .i_clk(clk), 258 | .i_rst(rst), 259 | .i_addr_r(addrr_qmax), 260 | .i_addr_w(addrw_qmax), 261 | .i_read_en(rflag_qmax), 262 | .i_write_en(wflag_qmax), 263 | .i_data(data_in_qmax), 264 | .o_data(data_out_qmax)); 265 | 266 | rtable rt0( 267 | .i_clk(clk), 268 | .i_addr(addr_r), 269 | .i_read(rflag_r), 270 | .o_data(data_out_r)); 271 | 272 | floating_point_0 sum1 ( 273 | .aclk(clk), // input wire aclk 274 | .s_axis_a_tvalid(1'b1), // input wire s_axis_a_tvalid 275 | .s_axis_a_tdata(alpha), // input wire [31 : 0] s_axis_a_tdata 276 | .s_axis_b_tvalid(1'b1), // input wire s_axis_b_tvalid 277 | .s_axis_b_tdata(r1), // input wire [31 : 0] s_axis_b_tdata 278 | .m_axis_result_tvalid(result_tvalid), // output wire m_axis_result_tvalid 279 | .m_axis_result_tdata(sum_part1) // output wire [31 : 0] m_axis_result_tdata 280 | ); 281 | 282 | floating_point_0 sum2 ( 283 | .aclk(clk), // input wire aclk 284 | .s_axis_a_tvalid(1'b1), // input wire s_axis_a_tvalid 285 | .s_axis_a_tdata(oneminusa), // input wire [31 : 0] s_axis_a_tdata 286 | .s_axis_b_tvalid(1'b1), // input wire s_axis_b_tvalid 287 | .s_axis_b_tdata(q1), // input wire [31 : 0] s_axis_b_tdata 288 | .m_axis_result_tvalid(result_tvalid2), // output wire m_axis_result_tvalid 289 | .m_axis_result_tdata(sum_part2) // output wire [31 : 0] m_axis_result_tdata 290 | ); 291 | 292 | floating_point_0 sum3 ( 293 | .aclk(clk), // input wire aclk 294 | .s_axis_a_tvalid(1'b1), // input wire s_axis_a_tvalid 295 | .s_axis_a_tdata(ag), // input wire [31 : 0] s_axis_a_tdata 296 | .s_axis_b_tvalid(1'b1), // input wire s_axis_b_tvalid 297 | .s_axis_b_tdata(qmax), // input wire [31 : 0] s_axis_b_tdata 298 | .m_axis_result_tvalid(result_tvalid3), // output wire m_axis_result_tvalid 299 | .m_axis_result_tdata(sum_part3 ) // output wire [31 : 0] m_axis_result_tdata 300 | ); 301 | 302 | floating_point_1 sum12 ( 303 | .aclk(clk), 304 | .s_axis_a_tvalid(1'b1), // input wire s_axis_a_tvalid 305 | .s_axis_a_tdata(sum_part1), // input wire [31 : 0] s_axis_a_tdata 306 | .s_axis_b_tvalid(1'b1), // input wire s_axis_b_tvalid 307 | .s_axis_b_tdata(sum_part2), // input wire [31 : 0] s_axis_b_tdata 308 | .m_axis_result_tvalid(result_tvalid12), // output wire m_axis_result_tvalid 309 | .m_axis_result_tdata(sum_part12) // output wire [31 : 0] m_axis_result_tdata 310 | ); 311 | 312 | floating_point_1 sum23 ( 313 | .aclk(clk), 314 | .s_axis_a_tvalid(1'b1), // input wire s_axis_a_tvalid 315 | .s_axis_a_tdata(sum_part12), // input wire [31 : 0] s_axis_a_tdata 316 | .s_axis_b_tvalid(1'b1), // input wire s_axis_b_tvalid 317 | .s_axis_b_tdata(sum_part3a), // input wire [31 : 0] s_axis_b_tdata 318 | .m_axis_result_tvalid(result_tvalid123), // output wire m_axis_result_tvalid 319 | .m_axis_result_tdata(sum) // output wire [31 : 0] m_axis_result_tdata 320 | ); 321 | 322 | endmodule -------------------------------------------------------------------------------- /65536_8 floatpoint/all_float/pipeline_power_routed.rpt: -------------------------------------------------------------------------------- 1 | Copyright 1986-2019 Xilinx, Inc. All Rights Reserved. 2 | ---------------------------------------------------------------------------------------------------------------------------------------------- 3 | | Tool Version : Vivado v.2019.1 (win64) Build 2552052 Fri May 24 14:49:42 MDT 2019 4 | | Date : Sun Sep 15 03:00:14 2019 5 | | Host : DESKTOP-18L2VEF running 64-bit major release (build 9200) 6 | | Command : report_power -file pipeline_power_routed.rpt -pb pipeline_power_summary_routed.pb -rpx pipeline_power_routed.rpx 7 | | Design : pipeline 8 | | Device : xc7vx485tffg1157-1 9 | | Design State : routed 10 | | Grade : commercial 11 | | Process : typical 12 | | Characterization : Production 13 | ---------------------------------------------------------------------------------------------------------------------------------------------- 14 | 15 | Power Report 16 | 17 | Table of Contents 18 | ----------------- 19 | 1. Summary 20 | 1.1 On-Chip Components 21 | 1.2 Power Supply Summary 22 | 1.3 Confidence Level 23 | 2. Settings 24 | 2.1 Environment 25 | 2.2 Clock Constraints 26 | 3. Detailed Reports 27 | 3.1 By Hierarchy 28 | 29 | 1. Summary 30 | ---------- 31 | 32 | +--------------------------+--------------+ 33 | | Total On-Chip Power (W) | 0.570 | 34 | | Design Power Budget (W) | Unspecified* | 35 | | Power Budget Margin (W) | NA | 36 | | Dynamic (W) | 0.290 | 37 | | Device Static (W) | 0.279 | 38 | | Effective TJA (C/W) | 1.4 | 39 | | Max Ambient (C) | 84.2 | 40 | | Junction Temperature (C) | 25.8 | 41 | | Confidence Level | Medium | 42 | | Setting File | --- | 43 | | Simulation Activity File | --- | 44 | | Design Nets Matched | NA | 45 | +--------------------------+--------------+ 46 | * Specify Design Power Budget using, set_operating_conditions -design_power_budget 47 | 48 | 49 | 1.1 On-Chip Components 50 | ---------------------- 51 | 52 | +-------------------------+-----------+----------+-----------+-----------------+ 53 | | On-Chip | Power (W) | Used | Available | Utilization (%) | 54 | +-------------------------+-----------+----------+-----------+-----------------+ 55 | | Clocks | 0.018 | 3 | --- | --- | 56 | | Slice Logic | 0.008 | 3713 | --- | --- | 57 | | LUT as Logic | 0.007 | 1799 | 303600 | 0.59 | 58 | | Register | <0.001 | 1486 | 607200 | 0.24 | 59 | | CARRY4 | <0.001 | 86 | 75900 | 0.11 | 60 | | LUT as Shift Register | <0.001 | 13 | 130800 | <0.01 | 61 | | F7/F8 Muxes | <0.001 | 36 | 303600 | 0.01 | 62 | | Others | 0.000 | 156 | --- | --- | 63 | | Signals | 0.077 | 4618 | --- | --- | 64 | | Block RAM | 0.175 | 576 | 1030 | 55.92 | 65 | | DSPs | 0.003 | 9 | 2800 | 0.32 | 66 | | I/O | 0.010 | 36 | 600 | 6.00 | 67 | | Static Power | 0.279 | | | | 68 | | Total | 0.570 | | | | 69 | +-------------------------+-----------+----------+-----------+-----------------+ 70 | 71 | 72 | 1.2 Power Supply Summary 73 | ------------------------ 74 | 75 | +-----------+-------------+-----------+-------------+------------+ 76 | | Source | Voltage (V) | Total (A) | Dynamic (A) | Static (A) | 77 | +-----------+-------------+-----------+-------------+------------+ 78 | | Vccint | 1.000 | 0.417 | 0.266 | 0.151 | 79 | | Vccaux | 1.800 | 0.038 | 0.001 | 0.038 | 80 | | Vcco33 | 3.300 | 0.000 | 0.000 | 0.000 | 81 | | Vcco25 | 2.500 | 0.000 | 0.000 | 0.000 | 82 | | Vcco18 | 1.800 | 0.006 | 0.005 | 0.001 | 83 | | Vcco15 | 1.500 | 0.000 | 0.000 | 0.000 | 84 | | Vcco135 | 1.350 | 0.000 | 0.000 | 0.000 | 85 | | Vcco12 | 1.200 | 0.000 | 0.000 | 0.000 | 86 | | Vccaux_io | 1.800 | 0.000 | 0.000 | 0.000 | 87 | | Vccbram | 1.000 | 0.037 | 0.015 | 0.023 | 88 | | MGTAVcc | 1.000 | 0.000 | 0.000 | 0.000 | 89 | | MGTAVtt | 1.200 | 0.000 | 0.000 | 0.000 | 90 | | MGTVccaux | 1.800 | 0.000 | 0.000 | 0.000 | 91 | | MGTZVccl | 1.075 | 0.000 | 0.000 | 0.000 | 92 | | MGTZAVcc | 1.075 | 0.000 | 0.000 | 0.000 | 93 | | MGTZVcch | 1.800 | 0.000 | 0.000 | 0.000 | 94 | | Vccadc | 1.800 | 0.020 | 0.000 | 0.020 | 95 | +-----------+-------------+-----------+-------------+------------+ 96 | 97 | 98 | 1.3 Confidence Level 99 | -------------------- 100 | 101 | +-----------------------------+------------+-------------------------------------------------------+------------------------------------------------------------------------------------------------------------+ 102 | | User Input Data | Confidence | Details | Action | 103 | +-----------------------------+------------+-------------------------------------------------------+------------------------------------------------------------------------------------------------------------+ 104 | | Design implementation state | High | Design is routed | | 105 | | Clock nodes activity | High | User specified more than 95% of clocks | | 106 | | I/O nodes activity | Medium | More than 5% of inputs are missing user specification | Provide missing input activity with simulation results or by editing the "By Resource Type -> I/Os" view | 107 | | Internal nodes activity | Medium | User specified less than 25% of internal nodes | Provide missing internal nodes activity with simulation results or by editing the "By Resource Type" views | 108 | | Device models | High | Device models are Production | | 109 | | | | | | 110 | | Overall confidence level | Medium | | | 111 | +-----------------------------+------------+-------------------------------------------------------+------------------------------------------------------------------------------------------------------------+ 112 | 113 | 114 | 2. Settings 115 | ----------- 116 | 117 | 2.1 Environment 118 | --------------- 119 | 120 | +-----------------------+--------------------------+ 121 | | Ambient Temp (C) | 25.0 | 122 | | ThetaJA (C/W) | 1.4 | 123 | | Airflow (LFM) | 250 | 124 | | Heat Sink | medium (Medium Profile) | 125 | | ThetaSA (C/W) | 2.4 | 126 | | Board Selection | medium (10"x10") | 127 | | # of Board Layers | 12to15 (12 to 15 Layers) | 128 | | Board Temperature (C) | 25.0 | 129 | +-----------------------+--------------------------+ 130 | 131 | 132 | 2.2 Clock Constraints 133 | --------------------- 134 | 135 | +-------+--------+-----------------+ 136 | | Clock | Domain | Constraint (ns) | 137 | +-------+--------+-----------------+ 138 | | clk | clk | 20.0 | 139 | +-------+--------+-----------------+ 140 | 141 | 142 | 3. Detailed Reports 143 | ------------------- 144 | 145 | 3.1 By Hierarchy 146 | ---------------- 147 | 148 | +---------------+-----------+ 149 | | Name | Power (W) | 150 | +---------------+-----------+ 151 | | pipeline | 0.290 | 152 | | qmaxt0 | 0.090 | 153 | | qt0 | 0.106 | 154 | | sum12 | 0.003 | 155 | | U0 | 0.003 | 156 | | i_synth | 0.003 | 157 | | sum2 | 0.002 | 158 | | U0 | 0.002 | 159 | | i_synth | 0.002 | 160 | | sum23 | 0.005 | 161 | | U0 | 0.005 | 162 | | i_synth | 0.005 | 163 | | sum3 | 0.002 | 164 | | U0 | 0.002 | 165 | | i_synth | 0.002 | 166 | +---------------+-----------+ 167 | 168 | 169 | -------------------------------------------------------------------------------- /65536_8 floatpoint/all_float/pipeline_utilization_placed.rpt: -------------------------------------------------------------------------------- 1 | Copyright 1986-2019 Xilinx, Inc. All Rights Reserved. 2 | ------------------------------------------------------------------------------------------------------------- 3 | | Tool Version : Vivado v.2019.1 (win64) Build 2552052 Fri May 24 14:49:42 MDT 2019 4 | | Date : Sun Sep 15 02:57:59 2019 5 | | Host : DESKTOP-18L2VEF running 64-bit major release (build 9200) 6 | | Command : report_utilization -file pipeline_utilization_placed.rpt -pb pipeline_utilization_placed.pb 7 | | Design : pipeline 8 | | Device : 7vx485tffg1157-1 9 | | Design State : Fully Placed 10 | ------------------------------------------------------------------------------------------------------------- 11 | 12 | Utilization Design Information 13 | 14 | Table of Contents 15 | ----------------- 16 | 1. Slice Logic 17 | 1.1 Summary of Registers by Type 18 | 2. Slice Logic Distribution 19 | 3. Memory 20 | 4. DSP 21 | 5. IO and GT Specific 22 | 6. Clocking 23 | 7. Specific Feature 24 | 8. Primitives 25 | 9. Black Boxes 26 | 10. Instantiated Netlists 27 | 28 | 1. Slice Logic 29 | -------------- 30 | 31 | +----------------------------+------+-------+-----------+-------+ 32 | | Site Type | Used | Fixed | Available | Util% | 33 | +----------------------------+------+-------+-----------+-------+ 34 | | Slice LUTs | 1812 | 0 | 303600 | 0.60 | 35 | | LUT as Logic | 1799 | 0 | 303600 | 0.59 | 36 | | LUT as Memory | 13 | 0 | 130800 | <0.01 | 37 | | LUT as Distributed RAM | 0 | 0 | | | 38 | | LUT as Shift Register | 13 | 0 | | | 39 | | Slice Registers | 1486 | 0 | 607200 | 0.24 | 40 | | Register as Flip Flop | 1486 | 0 | 607200 | 0.24 | 41 | | Register as Latch | 0 | 0 | 607200 | 0.00 | 42 | | F7 Muxes | 34 | 0 | 151800 | 0.02 | 43 | | F8 Muxes | 2 | 0 | 75900 | <0.01 | 44 | +----------------------------+------+-------+-----------+-------+ 45 | 46 | 47 | 1.1 Summary of Registers by Type 48 | -------------------------------- 49 | 50 | +-------+--------------+-------------+--------------+ 51 | | Total | Clock Enable | Synchronous | Asynchronous | 52 | +-------+--------------+-------------+--------------+ 53 | | 0 | _ | - | - | 54 | | 0 | _ | - | Set | 55 | | 0 | _ | - | Reset | 56 | | 0 | _ | Set | - | 57 | | 0 | _ | Reset | - | 58 | | 0 | Yes | - | - | 59 | | 0 | Yes | - | Set | 60 | | 24 | Yes | - | Reset | 61 | | 0 | Yes | Set | - | 62 | | 1462 | Yes | Reset | - | 63 | +-------+--------------+-------------+--------------+ 64 | 65 | 66 | 2. Slice Logic Distribution 67 | --------------------------- 68 | 69 | +--------------------------------------------+------+-------+-----------+-------+ 70 | | Site Type | Used | Fixed | Available | Util% | 71 | +--------------------------------------------+------+-------+-----------+-------+ 72 | | Slice | 1479 | 0 | 75900 | 1.95 | 73 | | SLICEL | 528 | 0 | | | 74 | | SLICEM | 951 | 0 | | | 75 | | LUT as Logic | 1799 | 0 | 303600 | 0.59 | 76 | | using O5 output only | 0 | | | | 77 | | using O6 output only | 1671 | | | | 78 | | using O5 and O6 | 128 | | | | 79 | | LUT as Memory | 13 | 0 | 130800 | <0.01 | 80 | | LUT as Distributed RAM | 0 | 0 | | | 81 | | LUT as Shift Register | 13 | 0 | | | 82 | | using O5 output only | 0 | | | | 83 | | using O6 output only | 4 | | | | 84 | | using O5 and O6 | 9 | | | | 85 | | Slice Registers | 1486 | 0 | 607200 | 0.24 | 86 | | Register driven from within the Slice | 233 | | | | 87 | | Register driven from outside the Slice | 1253 | | | | 88 | | LUT in front of the register is unused | 1128 | | | | 89 | | LUT in front of the register is used | 125 | | | | 90 | | Unique Control Sets | 10 | | 75900 | 0.01 | 91 | +--------------------------------------------+------+-------+-----------+-------+ 92 | * Note: Available Control Sets calculated as Slice Registers / 8, Review the Control Sets Report for more information regarding control sets. 93 | 94 | 95 | 3. Memory 96 | --------- 97 | 98 | +-------------------+------+-------+-----------+-------+ 99 | | Site Type | Used | Fixed | Available | Util% | 100 | +-------------------+------+-------+-----------+-------+ 101 | | Block RAM Tile | 576 | 0 | 1030 | 55.92 | 102 | | RAMB36/FIFO* | 576 | 0 | 1030 | 55.92 | 103 | | RAMB36E1 only | 576 | | | | 104 | | RAMB18 | 0 | 0 | 2060 | 0.00 | 105 | +-------------------+------+-------+-----------+-------+ 106 | * Note: Each Block RAM Tile only has one FIFO logic available and therefore can accommodate only one FIFO36E1 or one FIFO18E1. However, if a FIFO18E1 occupies a Block RAM Tile, that tile can still accommodate a RAMB18E1 107 | 108 | 109 | 4. DSP 110 | ------ 111 | 112 | +----------------+------+-------+-----------+-------+ 113 | | Site Type | Used | Fixed | Available | Util% | 114 | +----------------+------+-------+-----------+-------+ 115 | | DSPs | 9 | 0 | 2800 | 0.32 | 116 | | DSP48E1 only | 9 | | | | 117 | +----------------+------+-------+-----------+-------+ 118 | 119 | 120 | 5. IO and GT Specific 121 | --------------------- 122 | 123 | +-----------------------------+------+-------+-----------+-------+ 124 | | Site Type | Used | Fixed | Available | Util% | 125 | +-----------------------------+------+-------+-----------+-------+ 126 | | Bonded IOB | 36 | 0 | 600 | 6.00 | 127 | | IOB Master Pads | 17 | | | | 128 | | IOB Slave Pads | 18 | | | | 129 | | Bonded IPADs | 0 | 0 | 62 | 0.00 | 130 | | Bonded OPADs | 0 | 0 | 40 | 0.00 | 131 | | PHY_CONTROL | 0 | 0 | 14 | 0.00 | 132 | | PHASER_REF | 0 | 0 | 14 | 0.00 | 133 | | OUT_FIFO | 0 | 0 | 56 | 0.00 | 134 | | IN_FIFO | 0 | 0 | 56 | 0.00 | 135 | | IDELAYCTRL | 0 | 0 | 14 | 0.00 | 136 | | IBUFDS | 0 | 0 | 576 | 0.00 | 137 | | GTXE2_COMMON | 0 | 0 | 5 | 0.00 | 138 | | GTXE2_CHANNEL | 0 | 0 | 20 | 0.00 | 139 | | PHASER_OUT/PHASER_OUT_PHY | 0 | 0 | 56 | 0.00 | 140 | | PHASER_IN/PHASER_IN_PHY | 0 | 0 | 56 | 0.00 | 141 | | IDELAYE2/IDELAYE2_FINEDELAY | 0 | 0 | 700 | 0.00 | 142 | | ODELAYE2/ODELAYE2_FINEDELAY | 0 | 0 | 700 | 0.00 | 143 | | IBUFDS_GTE2 | 0 | 0 | 10 | 0.00 | 144 | | ILOGIC | 0 | 0 | 600 | 0.00 | 145 | | OLOGIC | 0 | 0 | 600 | 0.00 | 146 | +-----------------------------+------+-------+-----------+-------+ 147 | 148 | 149 | 6. Clocking 150 | ----------- 151 | 152 | +------------+------+-------+-----------+-------+ 153 | | Site Type | Used | Fixed | Available | Util% | 154 | +------------+------+-------+-----------+-------+ 155 | | BUFGCTRL | 1 | 0 | 32 | 3.13 | 156 | | BUFIO | 0 | 0 | 56 | 0.00 | 157 | | MMCME2_ADV | 0 | 0 | 14 | 0.00 | 158 | | PLLE2_ADV | 0 | 0 | 14 | 0.00 | 159 | | BUFMRCE | 0 | 0 | 28 | 0.00 | 160 | | BUFHCE | 0 | 0 | 168 | 0.00 | 161 | | BUFR | 0 | 0 | 56 | 0.00 | 162 | +------------+------+-------+-----------+-------+ 163 | 164 | 165 | 7. Specific Feature 166 | ------------------- 167 | 168 | +-------------+------+-------+-----------+-------+ 169 | | Site Type | Used | Fixed | Available | Util% | 170 | +-------------+------+-------+-----------+-------+ 171 | | BSCANE2 | 0 | 0 | 4 | 0.00 | 172 | | CAPTUREE2 | 0 | 0 | 1 | 0.00 | 173 | | DNA_PORT | 0 | 0 | 1 | 0.00 | 174 | | EFUSE_USR | 0 | 0 | 1 | 0.00 | 175 | | FRAME_ECCE2 | 0 | 0 | 1 | 0.00 | 176 | | ICAPE2 | 0 | 0 | 2 | 0.00 | 177 | | PCIE_2_1 | 0 | 0 | 4 | 0.00 | 178 | | STARTUPE2 | 0 | 0 | 1 | 0.00 | 179 | | XADC | 0 | 0 | 1 | 0.00 | 180 | +-------------+------+-------+-----------+-------+ 181 | 182 | 183 | 8. Primitives 184 | ------------- 185 | 186 | +----------+------+---------------------+ 187 | | Ref Name | Used | Functional Category | 188 | +----------+------+---------------------+ 189 | | FDRE | 1462 | Flop & Latch | 190 | | LUT4 | 778 | LUT | 191 | | RAMB36E1 | 576 | Block Memory | 192 | | LUT6 | 452 | LUT | 193 | | LUT5 | 373 | LUT | 194 | | LUT3 | 182 | LUT | 195 | | LUT2 | 126 | LUT | 196 | | CARRY4 | 86 | CarryLogic | 197 | | MUXF7 | 34 | MuxFx | 198 | | OBUF | 32 | IO | 199 | | FDCE | 24 | Flop & Latch | 200 | | SRL16E | 22 | Distributed Memory | 201 | | LUT1 | 16 | LUT | 202 | | DSP48E1 | 9 | Block Arithmetic | 203 | | IBUF | 4 | IO | 204 | | MUXF8 | 2 | MuxFx | 205 | | BUFG | 1 | Clock | 206 | +----------+------+---------------------+ 207 | 208 | 209 | 9. Black Boxes 210 | -------------- 211 | 212 | +----------+------+ 213 | | Ref Name | Used | 214 | +----------+------+ 215 | 216 | 217 | 10. Instantiated Netlists 218 | ------------------------- 219 | 220 | +------------------+------+ 221 | | Ref Name | Used | 222 | +------------------+------+ 223 | | floating_point_0 | 3 | 224 | | floating_point_1 | 2 | 225 | +------------------+------+ 226 | 227 | 228 | -------------------------------------------------------------------------------- /65536_8 floatpoint/all_float/qmaxtable.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | //qmax values stored on BRAM 3 | //width depends on range of q value, depth depends on number of states 4 | module qmaxtable #(parameter ADDR_WIDTH = 16, DATA_WIDTH = 32, DEPTH = 65536) ( 5 | input wire i_clk, 6 | input wire i_rst, 7 | input wire [ADDR_WIDTH-1:0] i_addr_r, 8 | input wire [ADDR_WIDTH-1:0] i_addr_w, 9 | input wire i_read_en, 10 | input wire i_write_en, 11 | input wire [DATA_WIDTH-1:0] i_data, 12 | output reg [DATA_WIDTH-1:0] o_data 13 | //output reg [DATA_WIDTH-1:0] o_data2 14 | ); 15 | integer i; 16 | reg [DATA_WIDTH-1:0] memory_array [0:DEPTH-1]; 17 | //initialize the bram: depends on the test case 18 | /*initial begin 19 | memory_array[0] <= 0; 20 | for (i=0;i11 or (8,7)=>10 gets big reward 53 | 19'b1111_1110_1111_1111_100: o_data<= 32'b01000111100000000000000000000000; //65536 54 | 19'b1111_1111_1111_1110_110: o_data<= 32'b01000111100000000000000000000000; 55 | 19'b1111_1110_1111_1110_101: o_data<= 32'b01000111100000000000000000000000; 56 | //... depends on the dataset?? 57 | 58 | 59 | default : o_data<= {DATA_WIDTH{1'b0}}; //others no reward 60 | endcase 61 | //$display("r read %02h from: %08b\n", o_data, i_addr); 62 | end 63 | endmodule 64 | -------------------------------------------------------------------------------- /65536_8 floatpoint/pipeline.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 09/02/2019 10:53:28 AM 7 | // Design Name: 8 | // Module Name: pipeline 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | 22 | 23 | //store tables in BRAMs 24 | //width depends on range of q value, depth depends on number of states times num of actions 25 | 26 | //The 4-stage pipeline 27 | //inputs: action 28 | module pipeline #(parameter ADDR_Q_WIDTH = 19,parameter ADDR_Qmax_WIDTH = 16, DATA_WIDTH = 32) ( input clk,input rst, input[2:0] action, output reg[63:0] sum); 29 | 30 | //used in stage 1 31 | reg[DATA_WIDTH-1:0] q; //q value 32 | reg[DATA_WIDTH-1:0] r; //reward 33 | reg[DATA_WIDTH-1:0] q1; //q value 34 | reg[DATA_WIDTH-1:0] r1; //reward 35 | reg[DATA_WIDTH-1:0] qmax; 36 | 37 | reg[ADDR_Qmax_WIDTH-1:0] s; //2^16 possible states (256x256 (x,y) grid, s[15:8]s -> x, s[7:0] -> y) 38 | reg[DATA_WIDTH-1:0] alpha; 39 | reg[DATA_WIDTH-1:0] gamma; 40 | reg[DATA_WIDTH-1:0] oneminusa; //1-alpha 41 | reg[DATA_WIDTH-1:0] ag; //alpha*gamma 42 | wire result_tvalid; 43 | wire result_tvalid2; 44 | wire result_tvalid3; 45 | 46 | //propagate for qmax writing address 47 | reg[ADDR_Qmax_WIDTH-1:0] current_s ; 48 | reg[ADDR_Qmax_WIDTH-1:0] current_s1 ; 49 | reg[ADDR_Qmax_WIDTH-1:0] current_s2 ; 50 | reg[ADDR_Qmax_WIDTH-1:0] current_s3 ; 51 | reg[ADDR_Qmax_WIDTH-1:0] current_s4 ; 52 | //propagate for q writing address 53 | reg[2:0] current_a ; 54 | reg[2:0] current_a1 ; 55 | reg[2:0] current_a2 ; 56 | reg[2:0] current_a3 ; 57 | reg[2:0] current_a4 ; 58 | 59 | reg[7:0] sx ; // s[15:8]s -> x, 60 | reg[7:0] sy ; // s[7:0] -> y) 61 | reg[ADDR_Qmax_WIDTH-1:0] nexts; //next state for state transition 62 | 63 | //used in stage 2 64 | 65 | //used in stage 3 66 | //reg [23:0] sum; 67 | 68 | //used in stage 1 and 4 69 | //used for q table reading & writing 70 | reg [ADDR_Q_WIDTH-1:0] addrr_q; 71 | reg [ADDR_Q_WIDTH-1:0] addrw_q; 72 | //reg [7:0] addrr_q_tmp; 73 | //reg [7:0] addr_r_tmp; 74 | reg rflag_q; //0 or 1 75 | reg wflag_q; //0 or 1 76 | reg [DATA_WIDTH-1:0] data_in_q; 77 | wire [DATA_WIDTH-1:0] data_out_q; 78 | 79 | //used for qmax table reading & writing 80 | reg [ADDR_Qmax_WIDTH-1:0] addrr_qmax; 81 | reg [ADDR_Qmax_WIDTH-1:0] addrw_qmax; 82 | reg rflag_qmax; //0 or 1 83 | reg wflag_qmax; //0 or 1 84 | reg [DATA_WIDTH-1:0] data_in_qmax; 85 | wire [DATA_WIDTH-1:0] data_out_qmax; 86 | 87 | //used for r table reading 88 | reg [ADDR_Q_WIDTH-1:0] addr_r; 89 | reg rflag_r; //0 or 1 90 | wire [DATA_WIDTH-1:0] data_out_r; 91 | localparam sf = 2.0**-4.0; 92 | //--------------stage 1----------------- 93 | always @(posedge clk) begin 94 | //initialize state and action 95 | if (rst) begin 96 | s<= {16{1'b0}}; 97 | current_s<={16{1'b0}}; 98 | nexts<={16{1'b0}}; 99 | alpha<=32'b00111111010011001100110011001101; //0.8 100 | gamma<=32'b00111111010011001100110011001101; //0.8 101 | 102 | end 103 | 104 | //calculate 1-a and a*g 105 | //scaling factor=2.0**-4.0 _ 106 | //ag <= alpha*gamma; 107 | ag<=32'b01000111101011100001011; //0.8*0.8 108 | //oneminusa <= 32'b00111111100000000000000000000000 - alpha; 109 | oneminusa<=32'b10011001100110011001100; //1-0.8 110 | 111 | //locate next state 112 | sx<=s[15:8];sy<=s[7:0]; 113 | if (sx=={8{1'b0}} && (action==3'b000)||(action==3'b001)||(action==3'b111)) begin //left wall 114 | nexts<=s; 115 | end 116 | else if (sy=={8{1'b0}} && (action==3'b001)||(action==3'b010)||(action==3'b011)) begin //up wall 117 | nexts<=s; 118 | end 119 | else if (sx=={8{1'b1}} &&(action==3'b011)||(action==3'b100)||(action==3'b101)) begin //right wall 120 | nexts<=s; 121 | end 122 | else if (sy=={8{1'b1}} && (action==3'b101)||(action==3'b110)||(action==3'b111)) begin //down wall 123 | nexts<=s; 124 | end 125 | else begin 126 | case (action) 127 | 3'b000: nexts<=s-16'b0000_0001_0000_0000;//to the left by 1 128 | 3'b001: nexts<=s-16'b0000_0001_0000_0001;//to the left-up by 1 129 | 3'b010: nexts<=s-16'b0000_0000_0000_0001;//to the up by 1 130 | 3'b011: nexts<=s-16'b0000_0000_0000_0001+16'b0000_0001_0000_0000;//to the up-right by 1 131 | 3'b100: nexts<=s+16'b0000_0001_0000_0000;//to the right by 1 132 | 3'b101: nexts<=s+16'b0000_0001_0000_0001;//to the right-down by 1 133 | 3'b110: nexts<=s+16'b0000_0000_0000_0001;//to the down by 1 134 | 3'b111: nexts<=s+16'b0000_0000_0000_0001-16'b0000_0001_0000_0000;//to the down-left by 1 135 | //default: 136 | endcase 137 | //nexts<={sx,sy}; 138 | end 139 | 140 | //get address for q and r and qmax 141 | addrr_q<={s,action}; 142 | addr_r<={s,action}; 143 | addrr_qmax<=nexts; 144 | 145 | //wait and transit the state 146 | current_s<=s; 147 | current_s1<=current_s; 148 | current_a<=action; 149 | current_a1<=current_a; 150 | s<=nexts; 151 | end 152 | 153 | 154 | 155 | //--------------stage 2----------------- 156 | always @(posedge clk) begin 157 | //locate q value from q table, save in q register 158 | rflag_q<=1; 159 | q<=data_out_q; 160 | q1<=q; 161 | 162 | rflag_r<=1; 163 | r<=data_out_r; 164 | r1<=r; 165 | //locate Qmax at next state from Qmax table 166 | 167 | rflag_qmax<=1; 168 | qmax<=data_out_qmax; 169 | 170 | current_s2<=current_s1; 171 | current_a2<=current_a1; 172 | 173 | end 174 | 175 | //--------------stage 3----------------- 176 | //always @(qmax or r or q or ag or oneminusa) begin 177 | 178 | wire [31:0] sum_part1; 179 | wire [31:0] sum_part2; 180 | wire [31:0] sum_part3; 181 | 182 | /*reg [31:0] sum_1; 183 | reg [31:0] sum_2; 184 | reg [31:0] sum_3;*/ 185 | 186 | always@(posedge clk) 187 | begin 188 | //sum_part1 <= alpha*r1; 189 | //sum_part2 <= oneminusa*q1; 190 | // sum_part3 <= ag*qmax; 191 | 192 | //calculations of q learning function 193 | //adder 194 | sum <= sum_part1 + sum_part2 + sum_part3; 195 | //$display("stage 3 sum: %04h", sum); 196 | 197 | current_s3<=current_s2; 198 | current_a3<=current_a2; 199 | current_s4<=current_s3; 200 | current_a4<=current_a3; 201 | 202 | end 203 | /* 204 | always @(posedge clk) begin 205 | //calculations of q learning function 206 | //adder 207 | sum <= alpha*r1 + oneminusa*q1 + ag*qmax; 208 | //sum <= alpha*r1*2**(-4) + oneminusa*q1*2**(-4) + ag*qmax*2**(-8); 209 | 210 | current_s3<=current_s2; 211 | current_a3<=current_a2; 212 | end */ 213 | 214 | 215 | 216 | //--------------stage 4----------------- 217 | //always @(sum) begin 218 | always @(posedge clk) begin 219 | // if(ce) begin 220 | //write back to qmax table 221 | if (sum>q)begin 222 | wflag_qmax<=1; 223 | addrw_qmax<=current_s3; 224 | data_in_qmax<=sum; 225 | end 226 | //write back to q table 227 | wflag_q<=1; 228 | addrw_q<={current_s3,current_a3}; 229 | data_in_q<=sum; 230 | 231 | end 232 | 233 | qtable qt0( 234 | .i_clk(clk), 235 | .i_rst(rst), 236 | .i_addr_r(addrr_q), 237 | .i_addr_w(addrw_q), 238 | .i_read_en(rflag_q), 239 | .i_write_en(wflag_q), 240 | .i_data(data_in_q), 241 | .o_data(data_out_q)); 242 | 243 | qmaxtable qmaxt0( 244 | .i_clk(clk), 245 | .i_rst(rst), 246 | .i_addr_r(addrr_qmax), 247 | .i_addr_w(addrw_qmax), 248 | .i_read_en(rflag_qmax), 249 | .i_write_en(wflag_qmax), 250 | .i_data(data_in_qmax), 251 | .o_data(data_out_qmax)); 252 | 253 | rtable rt0( 254 | .i_clk(clk), 255 | .i_addr(addr_r), 256 | .i_read(rflag_r), 257 | .o_data(data_out_r)); 258 | 259 | floating_point_0 sum1 ( 260 | .aclk(clk), // input wire aclk 261 | .s_axis_a_tvalid(1'b1), // input wire s_axis_a_tvalid 262 | .s_axis_a_tdata(alpha), // input wire [31 : 0] s_axis_a_tdata 263 | .s_axis_b_tvalid(1'b1), // input wire s_axis_b_tvalid 264 | .s_axis_b_tdata(r1), // input wire [31 : 0] s_axis_b_tdata 265 | .m_axis_result_tvalid(result_tvalid), // output wire m_axis_result_tvalid 266 | .m_axis_result_tdata(sum_part1) // output wire [31 : 0] m_axis_result_tdata 267 | ); 268 | 269 | floating_point_0 sum2 ( 270 | .aclk(clk), // input wire aclk 271 | .s_axis_a_tvalid(1'b1), // input wire s_axis_a_tvalid 272 | .s_axis_a_tdata(oneminusa), // input wire [31 : 0] s_axis_a_tdata 273 | .s_axis_b_tvalid(1'b1), // input wire s_axis_b_tvalid 274 | .s_axis_b_tdata(q1), // input wire [31 : 0] s_axis_b_tdata 275 | .m_axis_result_tvalid(result_tvalid2), // output wire m_axis_result_tvalid 276 | .m_axis_result_tdata(sum_part2) // output wire [31 : 0] m_axis_result_tdata 277 | ); 278 | 279 | floating_point_0 sum3 ( 280 | .aclk(clk), // input wire aclk 281 | .s_axis_a_tvalid(1'b1), // input wire s_axis_a_tvalid 282 | .s_axis_a_tdata(ag), // input wire [31 : 0] s_axis_a_tdata 283 | .s_axis_b_tvalid(1'b1), // input wire s_axis_b_tvalid 284 | .s_axis_b_tdata(qmax), // input wire [31 : 0] s_axis_b_tdata 285 | .m_axis_result_tvalid(result_tvalid3), // output wire m_axis_result_tvalid 286 | .m_axis_result_tdata(sum_part3 ) // output wire [31 : 0] m_axis_result_tdata 287 | ); 288 | 289 | endmodule 290 | -------------------------------------------------------------------------------- /65536_8 floatpoint/pipeline_power_routed.rpt: -------------------------------------------------------------------------------- 1 | Copyright 1986-2019 Xilinx, Inc. All Rights Reserved. 2 | ---------------------------------------------------------------------------------------------------------------------------------------------- 3 | | Tool Version : Vivado v.2019.1 (win64) Build 2552052 Fri May 24 14:49:42 MDT 2019 4 | | Date : Sat Sep 14 22:57:37 2019 5 | | Host : DESKTOP-18L2VEF running 64-bit major release (build 9200) 6 | | Command : report_power -file pipeline_power_routed.rpt -pb pipeline_power_summary_routed.pb -rpx pipeline_power_routed.rpx 7 | | Design : pipeline 8 | | Device : xc7vx485tffg1157-1 9 | | Design State : routed 10 | | Grade : commercial 11 | | Process : typical 12 | | Characterization : Production 13 | ---------------------------------------------------------------------------------------------------------------------------------------------- 14 | 15 | Power Report 16 | 17 | Table of Contents 18 | ----------------- 19 | 1. Summary 20 | 1.1 On-Chip Components 21 | 1.2 Power Supply Summary 22 | 1.3 Confidence Level 23 | 2. Settings 24 | 2.1 Environment 25 | 2.2 Clock Constraints 26 | 3. Detailed Reports 27 | 3.1 By Hierarchy 28 | 29 | 1. Summary 30 | ---------- 31 | 32 | +--------------------------+--------------+ 33 | | Total On-Chip Power (W) | 0.568 | 34 | | Design Power Budget (W) | Unspecified* | 35 | | Power Budget Margin (W) | NA | 36 | | Dynamic (W) | 0.289 | 37 | | Device Static (W) | 0.279 | 38 | | Effective TJA (C/W) | 1.4 | 39 | | Max Ambient (C) | 84.2 | 40 | | Junction Temperature (C) | 25.8 | 41 | | Confidence Level | Medium | 42 | | Setting File | --- | 43 | | Simulation Activity File | --- | 44 | | Design Nets Matched | NA | 45 | +--------------------------+--------------+ 46 | * Specify Design Power Budget using, set_operating_conditions -design_power_budget 47 | 48 | 49 | 1.1 On-Chip Components 50 | ---------------------- 51 | 52 | +-------------------------+-----------+----------+-----------+-----------------+ 53 | | On-Chip | Power (W) | Used | Available | Utilization (%) | 54 | +-------------------------+-----------+----------+-----------+-----------------+ 55 | | Clocks | 0.019 | 3 | --- | --- | 56 | | Slice Logic | 0.005 | 2775 | --- | --- | 57 | | LUT as Logic | 0.005 | 1160 | 303600 | 0.38 | 58 | | Register | <0.001 | 1426 | 607200 | 0.23 | 59 | | LUT as Shift Register | <0.001 | 13 | 130800 | <0.01 | 60 | | CARRY4 | <0.001 | 24 | 75900 | 0.03 | 61 | | F7/F8 Muxes | <0.001 | 32 | 303600 | 0.01 | 62 | | Others | 0.000 | 42 | --- | --- | 63 | | Signals | 0.078 | 3927 | --- | --- | 64 | | Block RAM | 0.176 | 576 | 1030 | 55.92 | 65 | | DSPs | 0.003 | 9 | 2800 | 0.32 | 66 | | I/O | 0.009 | 68 | 600 | 11.33 | 67 | | Static Power | 0.279 | | | | 68 | | Total | 0.568 | | | | 69 | +-------------------------+-----------+----------+-----------+-----------------+ 70 | 71 | 72 | 1.2 Power Supply Summary 73 | ------------------------ 74 | 75 | +-----------+-------------+-----------+-------------+------------+ 76 | | Source | Voltage (V) | Total (A) | Dynamic (A) | Static (A) | 77 | +-----------+-------------+-----------+-------------+------------+ 78 | | Vccint | 1.000 | 0.416 | 0.265 | 0.151 | 79 | | Vccaux | 1.800 | 0.038 | 0.001 | 0.038 | 80 | | Vcco33 | 3.300 | 0.000 | 0.000 | 0.000 | 81 | | Vcco25 | 2.500 | 0.000 | 0.000 | 0.000 | 82 | | Vcco18 | 1.800 | 0.005 | 0.004 | 0.001 | 83 | | Vcco15 | 1.500 | 0.000 | 0.000 | 0.000 | 84 | | Vcco135 | 1.350 | 0.000 | 0.000 | 0.000 | 85 | | Vcco12 | 1.200 | 0.000 | 0.000 | 0.000 | 86 | | Vccaux_io | 1.800 | 0.000 | 0.000 | 0.000 | 87 | | Vccbram | 1.000 | 0.037 | 0.015 | 0.023 | 88 | | MGTAVcc | 1.000 | 0.000 | 0.000 | 0.000 | 89 | | MGTAVtt | 1.200 | 0.000 | 0.000 | 0.000 | 90 | | MGTVccaux | 1.800 | 0.000 | 0.000 | 0.000 | 91 | | MGTZVccl | 1.075 | 0.000 | 0.000 | 0.000 | 92 | | MGTZAVcc | 1.075 | 0.000 | 0.000 | 0.000 | 93 | | MGTZVcch | 1.800 | 0.000 | 0.000 | 0.000 | 94 | | Vccadc | 1.800 | 0.020 | 0.000 | 0.020 | 95 | +-----------+-------------+-----------+-------------+------------+ 96 | 97 | 98 | 1.3 Confidence Level 99 | -------------------- 100 | 101 | +-----------------------------+------------+-------------------------------------------------------+------------------------------------------------------------------------------------------------------------+ 102 | | User Input Data | Confidence | Details | Action | 103 | +-----------------------------+------------+-------------------------------------------------------+------------------------------------------------------------------------------------------------------------+ 104 | | Design implementation state | High | Design is routed | | 105 | | Clock nodes activity | High | User specified more than 95% of clocks | | 106 | | I/O nodes activity | Medium | More than 5% of inputs are missing user specification | Provide missing input activity with simulation results or by editing the "By Resource Type -> I/Os" view | 107 | | Internal nodes activity | Medium | User specified less than 25% of internal nodes | Provide missing internal nodes activity with simulation results or by editing the "By Resource Type" views | 108 | | Device models | High | Device models are Production | | 109 | | | | | | 110 | | Overall confidence level | Medium | | | 111 | +-----------------------------+------------+-------------------------------------------------------+------------------------------------------------------------------------------------------------------------+ 112 | 113 | 114 | 2. Settings 115 | ----------- 116 | 117 | 2.1 Environment 118 | --------------- 119 | 120 | +-----------------------+--------------------------+ 121 | | Ambient Temp (C) | 25.0 | 122 | | ThetaJA (C/W) | 1.4 | 123 | | Airflow (LFM) | 250 | 124 | | Heat Sink | medium (Medium Profile) | 125 | | ThetaSA (C/W) | 2.4 | 126 | | Board Selection | medium (10"x10") | 127 | | # of Board Layers | 12to15 (12 to 15 Layers) | 128 | | Board Temperature (C) | 25.0 | 129 | +-----------------------+--------------------------+ 130 | 131 | 132 | 2.2 Clock Constraints 133 | --------------------- 134 | 135 | +-------+--------+-----------------+ 136 | | Clock | Domain | Constraint (ns) | 137 | +-------+--------+-----------------+ 138 | | clk | clk | 20.0 | 139 | +-------+--------+-----------------+ 140 | 141 | 142 | 3. Detailed Reports 143 | ------------------- 144 | 145 | 3.1 By Hierarchy 146 | ---------------- 147 | 148 | +---------------+-----------+ 149 | | Name | Power (W) | 150 | +---------------+-----------+ 151 | | pipeline | 0.289 | 152 | | qmaxt0 | 0.089 | 153 | | qt0 | 0.106 | 154 | | sum2 | 0.002 | 155 | | U0 | 0.002 | 156 | | i_synth | 0.002 | 157 | | sum3 | 0.002 | 158 | | U0 | 0.002 | 159 | | i_synth | 0.002 | 160 | +---------------+-----------+ 161 | 162 | 163 | -------------------------------------------------------------------------------- /65536_8 floatpoint/pipeline_utilization_placed.rpt: -------------------------------------------------------------------------------- 1 | Copyright 1986-2019 Xilinx, Inc. All Rights Reserved. 2 | ------------------------------------------------------------------------------------------------------------- 3 | | Tool Version : Vivado v.2019.1 (win64) Build 2552052 Fri May 24 14:49:42 MDT 2019 4 | | Date : Sat Sep 14 22:55:13 2019 5 | | Host : DESKTOP-18L2VEF running 64-bit major release (build 9200) 6 | | Command : report_utilization -file pipeline_utilization_placed.rpt -pb pipeline_utilization_placed.pb 7 | | Design : pipeline 8 | | Device : 7vx485tffg1157-1 9 | | Design State : Fully Placed 10 | ------------------------------------------------------------------------------------------------------------- 11 | 12 | Utilization Design Information 13 | 14 | Table of Contents 15 | ----------------- 16 | 1. Slice Logic 17 | 1.1 Summary of Registers by Type 18 | 2. Slice Logic Distribution 19 | 3. Memory 20 | 4. DSP 21 | 5. IO and GT Specific 22 | 6. Clocking 23 | 7. Specific Feature 24 | 8. Primitives 25 | 9. Black Boxes 26 | 10. Instantiated Netlists 27 | 28 | 1. Slice Logic 29 | -------------- 30 | 31 | +----------------------------+------+-------+-----------+-------+ 32 | | Site Type | Used | Fixed | Available | Util% | 33 | +----------------------------+------+-------+-----------+-------+ 34 | | Slice LUTs | 1173 | 0 | 303600 | 0.39 | 35 | | LUT as Logic | 1160 | 0 | 303600 | 0.38 | 36 | | LUT as Memory | 13 | 0 | 130800 | <0.01 | 37 | | LUT as Distributed RAM | 0 | 0 | | | 38 | | LUT as Shift Register | 13 | 0 | | | 39 | | Slice Registers | 1426 | 0 | 607200 | 0.23 | 40 | | Register as Flip Flop | 1426 | 0 | 607200 | 0.23 | 41 | | Register as Latch | 0 | 0 | 607200 | 0.00 | 42 | | F7 Muxes | 32 | 0 | 151800 | 0.02 | 43 | | F8 Muxes | 0 | 0 | 75900 | 0.00 | 44 | +----------------------------+------+-------+-----------+-------+ 45 | 46 | 47 | 1.1 Summary of Registers by Type 48 | -------------------------------- 49 | 50 | +-------+--------------+-------------+--------------+ 51 | | Total | Clock Enable | Synchronous | Asynchronous | 52 | +-------+--------------+-------------+--------------+ 53 | | 0 | _ | - | - | 54 | | 0 | _ | - | Set | 55 | | 0 | _ | - | Reset | 56 | | 0 | _ | Set | - | 57 | | 0 | _ | Reset | - | 58 | | 0 | Yes | - | - | 59 | | 0 | Yes | - | Set | 60 | | 24 | Yes | - | Reset | 61 | | 0 | Yes | Set | - | 62 | | 1402 | Yes | Reset | - | 63 | +-------+--------------+-------------+--------------+ 64 | 65 | 66 | 2. Slice Logic Distribution 67 | --------------------------- 68 | 69 | +--------------------------------------------+------+-------+-----------+-------+ 70 | | Site Type | Used | Fixed | Available | Util% | 71 | +--------------------------------------------+------+-------+-----------+-------+ 72 | | Slice | 1325 | 0 | 75900 | 1.75 | 73 | | SLICEL | 467 | 0 | | | 74 | | SLICEM | 858 | 0 | | | 75 | | LUT as Logic | 1160 | 0 | 303600 | 0.38 | 76 | | using O5 output only | 0 | | | | 77 | | using O6 output only | 1091 | | | | 78 | | using O5 and O6 | 69 | | | | 79 | | LUT as Memory | 13 | 0 | 130800 | <0.01 | 80 | | LUT as Distributed RAM | 0 | 0 | | | 81 | | LUT as Shift Register | 13 | 0 | | | 82 | | using O5 output only | 0 | | | | 83 | | using O6 output only | 4 | | | | 84 | | using O5 and O6 | 9 | | | | 85 | | Slice Registers | 1426 | 0 | 607200 | 0.23 | 86 | | Register driven from within the Slice | 169 | | | | 87 | | Register driven from outside the Slice | 1257 | | | | 88 | | LUT in front of the register is unused | 1193 | | | | 89 | | LUT in front of the register is used | 64 | | | | 90 | | Unique Control Sets | 8 | | 75900 | 0.01 | 91 | +--------------------------------------------+------+-------+-----------+-------+ 92 | * Note: Available Control Sets calculated as Slice Registers / 8, Review the Control Sets Report for more information regarding control sets. 93 | 94 | 95 | 3. Memory 96 | --------- 97 | 98 | +-------------------+------+-------+-----------+-------+ 99 | | Site Type | Used | Fixed | Available | Util% | 100 | +-------------------+------+-------+-----------+-------+ 101 | | Block RAM Tile | 576 | 0 | 1030 | 55.92 | 102 | | RAMB36/FIFO* | 576 | 0 | 1030 | 55.92 | 103 | | RAMB36E1 only | 576 | | | | 104 | | RAMB18 | 0 | 0 | 2060 | 0.00 | 105 | +-------------------+------+-------+-----------+-------+ 106 | * Note: Each Block RAM Tile only has one FIFO logic available and therefore can accommodate only one FIFO36E1 or one FIFO18E1. However, if a FIFO18E1 occupies a Block RAM Tile, that tile can still accommodate a RAMB18E1 107 | 108 | 109 | 4. DSP 110 | ------ 111 | 112 | +----------------+------+-------+-----------+-------+ 113 | | Site Type | Used | Fixed | Available | Util% | 114 | +----------------+------+-------+-----------+-------+ 115 | | DSPs | 9 | 0 | 2800 | 0.32 | 116 | | DSP48E1 only | 9 | | | | 117 | +----------------+------+-------+-----------+-------+ 118 | 119 | 120 | 5. IO and GT Specific 121 | --------------------- 122 | 123 | +-----------------------------+------+-------+-----------+-------+ 124 | | Site Type | Used | Fixed | Available | Util% | 125 | +-----------------------------+------+-------+-----------+-------+ 126 | | Bonded IOB | 68 | 0 | 600 | 11.33 | 127 | | IOB Master Pads | 33 | | | | 128 | | IOB Slave Pads | 32 | | | | 129 | | Bonded IPADs | 0 | 0 | 62 | 0.00 | 130 | | Bonded OPADs | 0 | 0 | 40 | 0.00 | 131 | | PHY_CONTROL | 0 | 0 | 14 | 0.00 | 132 | | PHASER_REF | 0 | 0 | 14 | 0.00 | 133 | | OUT_FIFO | 0 | 0 | 56 | 0.00 | 134 | | IN_FIFO | 0 | 0 | 56 | 0.00 | 135 | | IDELAYCTRL | 0 | 0 | 14 | 0.00 | 136 | | IBUFDS | 0 | 0 | 576 | 0.00 | 137 | | GTXE2_COMMON | 0 | 0 | 5 | 0.00 | 138 | | GTXE2_CHANNEL | 0 | 0 | 20 | 0.00 | 139 | | PHASER_OUT/PHASER_OUT_PHY | 0 | 0 | 56 | 0.00 | 140 | | PHASER_IN/PHASER_IN_PHY | 0 | 0 | 56 | 0.00 | 141 | | IDELAYE2/IDELAYE2_FINEDELAY | 0 | 0 | 700 | 0.00 | 142 | | ODELAYE2/ODELAYE2_FINEDELAY | 0 | 0 | 700 | 0.00 | 143 | | IBUFDS_GTE2 | 0 | 0 | 10 | 0.00 | 144 | | ILOGIC | 0 | 0 | 600 | 0.00 | 145 | | OLOGIC | 0 | 0 | 600 | 0.00 | 146 | +-----------------------------+------+-------+-----------+-------+ 147 | 148 | 149 | 6. Clocking 150 | ----------- 151 | 152 | +------------+------+-------+-----------+-------+ 153 | | Site Type | Used | Fixed | Available | Util% | 154 | +------------+------+-------+-----------+-------+ 155 | | BUFGCTRL | 1 | 0 | 32 | 3.13 | 156 | | BUFIO | 0 | 0 | 56 | 0.00 | 157 | | MMCME2_ADV | 0 | 0 | 14 | 0.00 | 158 | | PLLE2_ADV | 0 | 0 | 14 | 0.00 | 159 | | BUFMRCE | 0 | 0 | 28 | 0.00 | 160 | | BUFHCE | 0 | 0 | 168 | 0.00 | 161 | | BUFR | 0 | 0 | 56 | 0.00 | 162 | +------------+------+-------+-----------+-------+ 163 | 164 | 165 | 7. Specific Feature 166 | ------------------- 167 | 168 | +-------------+------+-------+-----------+-------+ 169 | | Site Type | Used | Fixed | Available | Util% | 170 | +-------------+------+-------+-----------+-------+ 171 | | BSCANE2 | 0 | 0 | 4 | 0.00 | 172 | | CAPTUREE2 | 0 | 0 | 1 | 0.00 | 173 | | DNA_PORT | 0 | 0 | 1 | 0.00 | 174 | | EFUSE_USR | 0 | 0 | 1 | 0.00 | 175 | | FRAME_ECCE2 | 0 | 0 | 1 | 0.00 | 176 | | ICAPE2 | 0 | 0 | 2 | 0.00 | 177 | | PCIE_2_1 | 0 | 0 | 4 | 0.00 | 178 | | STARTUPE2 | 0 | 0 | 1 | 0.00 | 179 | | XADC | 0 | 0 | 1 | 0.00 | 180 | +-------------+------+-------+-----------+-------+ 181 | 182 | 183 | 8. Primitives 184 | ------------- 185 | 186 | +----------+------+---------------------+ 187 | | Ref Name | Used | Functional Category | 188 | +----------+------+---------------------+ 189 | | FDRE | 1402 | Flop & Latch | 190 | | LUT4 | 629 | LUT | 191 | | RAMB36E1 | 576 | Block Memory | 192 | | LUT5 | 267 | LUT | 193 | | LUT6 | 167 | LUT | 194 | | LUT3 | 111 | LUT | 195 | | OBUF | 64 | IO | 196 | | LUT2 | 47 | LUT | 197 | | MUXF7 | 32 | MuxFx | 198 | | FDCE | 24 | Flop & Latch | 199 | | CARRY4 | 24 | CarryLogic | 200 | | SRL16E | 22 | Distributed Memory | 201 | | DSP48E1 | 9 | Block Arithmetic | 202 | | LUT1 | 8 | LUT | 203 | | IBUF | 4 | IO | 204 | | BUFG | 1 | Clock | 205 | +----------+------+---------------------+ 206 | 207 | 208 | 9. Black Boxes 209 | -------------- 210 | 211 | +----------+------+ 212 | | Ref Name | Used | 213 | +----------+------+ 214 | 215 | 216 | 10. Instantiated Netlists 217 | ------------------------- 218 | 219 | +------------------+------+ 220 | | Ref Name | Used | 221 | +------------------+------+ 222 | | floating_point_0 | 3 | 223 | +------------------+------+ 224 | 225 | 226 | -------------------------------------------------------------------------------- /65536_8 floatpoint/qmaxtable.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | //qmax values stored on BRAM 3 | //width depends on range of q value, depth depends on number of states 4 | module qmaxtable #(parameter ADDR_WIDTH = 16, DATA_WIDTH = 32, DEPTH = 65536) ( 5 | input wire i_clk, 6 | input wire i_rst, 7 | input wire [ADDR_WIDTH-1:0] i_addr_r, 8 | input wire [ADDR_WIDTH-1:0] i_addr_w, 9 | input wire i_read_en, 10 | input wire i_write_en, 11 | input wire [DATA_WIDTH-1:0] i_data, 12 | output reg [DATA_WIDTH-1:0] o_data 13 | //output reg [DATA_WIDTH-1:0] o_data2 14 | ); 15 | integer i; 16 | reg [DATA_WIDTH-1:0] memory_array [0:DEPTH-1]; 17 | 18 | always @ (posedge i_clk) 19 | begin 20 | /*if (i_rst) begin 21 | memory_array[0] <= 0; 22 | for (i=0;i11 or (8,7)=>10 gets big reward 53 | 19'b1111_1110_1111_1111_100: o_data<= 32'b01000111100000000000000000000000; //65536 54 | 19'b1111_1111_1111_1110_110: o_data<= 32'b01000111100000000000000000000000; 55 | 19'b1111_1110_1111_1110_101: o_data<= 32'b01000111100000000000000000000000; 56 | //... depends on the dataset?? 57 | 58 | 59 | default : o_data<= {DATA_WIDTH{1'b0}}; //others no reward 60 | endcase 61 | end 62 | endmodule 63 | -------------------------------------------------------------------------------- /65536_8 floatpoint/testbench.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 09/02/2019 10:56:26 AM 7 | // Design Name: 8 | // Module Name: testbench 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | module testbench( ); 22 | reg i_clk; 23 | reg i_rst; 24 | reg [2:0] a; 25 | reg[31:0] al; //xxxx.xxxx 0000_0010=0.125, fixed point representation for alpha and gamma 26 | reg[31:0] ga; 27 | //reg[7:0] alpha_in; 28 | //reg[7:0] gamma_in; 29 | wire [63:0] out; 30 | 31 | initial begin 32 | i_clk<=1; 33 | //#5 34 | i_rst<=1; 35 | al<=32'b00111101110011001100110011001101; //0.1 36 | ga<=32'b00111101110011001100110011001101; 37 | 38 | #10 i_rst<=0; 39 | forever begin 40 | #20 a<=$urandom%8; 41 | end 42 | //alpha_in=8'b0000_0001; 43 | //gamma_in=8'b0000_0001; 44 | end 45 | 46 | always begin 47 | #10 i_clk=~i_clk; 48 | 49 | end 50 | 51 | pipeline test( 52 | .clk(i_clk), 53 | .rst(i_rst), 54 | .action(a), 55 | //.alpha(al), 56 | //.gamma(ga), 57 | //.cina(ina), 58 | //.cinb(inb), 59 | //.alpha(alpha_in), 60 | //.gamma(gamma_in), 61 | .sum(out)); 62 | 63 | endmodule 64 | -------------------------------------------------------------------------------- /65536_8/pipeline.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 09/02/2019 10:53:28 AM 7 | // Design Name: 8 | // Module Name: pipeline 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | 22 | 23 | //store tables in BRAMs 24 | //width depends on range of q value, depth depends on number of states times num of actions 25 | 26 | //The 4-stage pipeline 27 | //inputs: action 28 | module pipeline #(parameter ADDR_Q_WIDTH = 19,parameter ADDR_Qmax_WIDTH = 16, DATA_WIDTH = 32) ( input clk,input rst, input[2:0] action, output reg[95:0] sum); 29 | 30 | //used in stage 1 31 | reg[DATA_WIDTH-1:0] q; //q value 32 | reg[DATA_WIDTH-1:0] r; //reward 33 | reg[DATA_WIDTH-1:0] q1; //q value 34 | reg[DATA_WIDTH-1:0] r1; //reward 35 | reg[DATA_WIDTH-1:0] qmax; 36 | reg[DATA_WIDTH-1:0] oneminusa; //1-alpha 37 | reg[63:0] ag; //alpha*gamma 38 | reg[ADDR_Qmax_WIDTH-1:0] s; //2^6 possible states (8x8 (x,y) grid, s[5:3]s -> x, s[2:0] -> y) 39 | reg[DATA_WIDTH-1:0] alpha; 40 | reg[DATA_WIDTH-1:0] gamma; 41 | 42 | //propagate for qmax writing address 43 | reg[ADDR_Qmax_WIDTH-1:0] current_s ; 44 | reg[ADDR_Qmax_WIDTH-1:0] current_s1 ; 45 | reg[ADDR_Qmax_WIDTH-1:0] current_s2 ; 46 | reg[ADDR_Qmax_WIDTH-1:0] current_s3 ; 47 | 48 | //propagate for q writing address 49 | reg[2:0] current_a ; 50 | reg[2:0] current_a1 ; 51 | reg[2:0] current_a2 ; 52 | reg[2:0] current_a3 ; 53 | 54 | reg[7:0] sx ; // s[15:8]s -> x, 55 | reg[7:0] sy ; // s[7:0] -> y) 56 | reg[ADDR_Qmax_WIDTH-1:0] nexts; //next state for state transition 57 | 58 | //used in stage 2 59 | 60 | //used in stage 3 61 | //reg [23:0] sum; 62 | 63 | //used in stage 1 and 4 64 | //used for q table reading & writing 65 | reg [ADDR_Q_WIDTH-1:0] addrr_q; 66 | reg [ADDR_Q_WIDTH-1:0] addrw_q; 67 | //reg [7:0] addrr_q_tmp; 68 | //reg [7:0] addr_r_tmp; 69 | reg rflag_q; //0 or 1 70 | reg wflag_q; //0 or 1 71 | reg [DATA_WIDTH-1:0] data_in_q; 72 | wire [DATA_WIDTH-1:0] data_out_q; 73 | 74 | //used for qmax table reading & writing 75 | reg [ADDR_Qmax_WIDTH-1:0] addrr_qmax; 76 | reg [ADDR_Qmax_WIDTH-1:0] addrw_qmax; 77 | reg rflag_qmax; //0 or 1 78 | reg wflag_qmax; //0 or 1 79 | reg [DATA_WIDTH-1:0] data_in_qmax; 80 | wire [DATA_WIDTH-1:0] data_out_qmax; 81 | 82 | //used for r table reading 83 | reg [ADDR_Q_WIDTH-1:0] addr_r; 84 | reg rflag_r; //0 or 1 85 | wire [DATA_WIDTH-1:0] data_out_r; 86 | localparam sf = 2.0**-4.0; 87 | //--------------stage 1----------------- 88 | always @(posedge clk) begin 89 | //initialize state and action 90 | if (rst) begin 91 | s<= {16{1'b0}}; 92 | current_s<={16{1'b0}}; 93 | nexts<={16{1'b0}}; 94 | alpha<={32{1'b0}}; 95 | gamma<={32{1'b0}}; 96 | end 97 | 98 | //calculate 1-a and a*g 99 | //scaling factor=2.0**-4.0 _ 100 | ag <= alpha*gamma; 101 | oneminusa <= 8'b0001_0000 - alpha; 102 | 103 | //locate next state 104 | sx<=s[15:8];sy<=s[7:0]; 105 | if (sx=={8{1'b0}} && (action==3'b000)||(action==3'b001)||(action==3'b111)) begin //left wall 106 | nexts<=s; 107 | end 108 | else if (sy=={8{1'b0}} && (action==3'b001)||(action==3'b010)||(action==3'b011)) begin //up wall 109 | nexts<=s; 110 | end 111 | else if (sx=={8{1'b1}} &&(action==3'b011)||(action==3'b100)||(action==3'b101)) begin //right wall 112 | nexts<=s; 113 | end 114 | else if (sy=={8{1'b1}} && (action==3'b101)||(action==3'b110)||(action==3'b111)) begin //down wall 115 | nexts<=s; 116 | end 117 | else begin 118 | case (action) 119 | 3'b000: nexts<=s-16'b0000_0001_0000_0000;//to the left by 1 120 | 3'b001: nexts<=s-16'b0000_0001_0000_0001;//to the left-up by 1 121 | 3'b010: nexts<=s-16'b0000_0000_0000_0001;//to the up by 1 122 | 3'b011: nexts<=s-16'b0000_0000_0000_0001+16'b0000_0001_0000_0000;//to the up-right by 1 123 | 3'b100: nexts<=s+16'b0000_0001_0000_0000;//to the right by 1 124 | 3'b101: nexts<=s+16'b0000_0001_0000_0001;//to the right-down by 1 125 | 3'b110: nexts<=s+16'b0000_0000_0000_0001;//to the down by 1 126 | 3'b111: nexts<=s+16'b0000_0000_0000_0001-16'b0000_0001_0000_0000;//to the down-left by 1 127 | //default: 128 | endcase 129 | //nexts<={sx,sy}; 130 | end 131 | 132 | //get address for q and r and qmax 133 | addrr_q<={s,action}; 134 | addr_r<={s,action}; 135 | addrr_qmax<=nexts; 136 | //$display("stage 1 s: %06b, action:%02b", s,action); 137 | //$display("stage 1 nexts: %06b", nexts); 138 | //$display("stage 1 addrr_q:%08b, addr_r:%08b, addr_qmax:%06b", addrr_q,addr_r,addrr_qmax); 139 | 140 | //wait and transit the state 141 | current_s<=s; 142 | current_s1<=current_s; 143 | current_a<=action; 144 | current_a1<=current_a; 145 | s<=nexts; 146 | end 147 | 148 | 149 | 150 | //--------------stage 2----------------- 151 | always @(posedge clk) begin 152 | //locate q value from q table, save in q register 153 | // $display("stage 2 s: %06b,current_s: %06b, action:%02b, addrr_q,%08b", s,current_s,action,addrr_q); 154 | rflag_q<=1; 155 | q<=data_out_q; 156 | q1<=q; 157 | 158 | rflag_r<=1; 159 | r<=data_out_r; 160 | r1<=r; 161 | //$display("stage 2 r1: %02h", r1); 162 | //$display("stage 2 q1: %02h", q1); 163 | //locate Qmax at next state from Qmax table 164 | 165 | rflag_qmax<=1; 166 | qmax<=data_out_qmax; 167 | //$display("stage 2 nexts: %06b", nexts); 168 | //$display("stage 2 addrr_qmax: %06b", addrr_qmax); 169 | //$display("stage 2 qmax: %02h", qmax); 170 | 171 | current_s2<=current_s1; 172 | current_a2<=current_a1; 173 | 174 | end 175 | 176 | //--------------stage 3----------------- 177 | //always @(qmax or r or q or ag or oneminusa) begin 178 | 179 | reg [23:0] sum_part1; 180 | reg [23:0] sum_part2; 181 | reg [23:0] sum_part3; 182 | 183 | always@(posedge clk) 184 | begin 185 | sum_part1 <= alpha*r1; 186 | sum_part2 <= oneminusa*q1; 187 | sum_part3 <= ag*qmax; 188 | end 189 | 190 | 191 | always @(posedge clk) begin 192 | //calculations of q learning function 193 | //adder 194 | sum <= sum_part1 + sum_part2 + sum_part3; 195 | //$display("stage 3 sum: %04h", sum); 196 | 197 | current_s3<=current_s2; 198 | current_a3<=current_a2; 199 | 200 | end 201 | /*always @(posedge clk) begin 202 | //calculations of q learning function 203 | //adder 204 | sum <= alpha*r1 + oneminusa*q1 + ag*qmax; 205 | //sum <= alpha*r1*2**(-4) + oneminusa*q1*2**(-4) + ag*qmax*2**(-8); 206 | //$display("stage 3 sum: %04h", sum); 207 | 208 | current_s3<=current_s2; 209 | current_a3<=current_a2; 210 | 211 | end */ 212 | 213 | 214 | 215 | //--------------stage 4----------------- 216 | //always @(sum) begin 217 | always @(posedge clk) begin 218 | // if(ce) begin 219 | //write back to qmax table 220 | if (sum>q)begin 221 | wflag_qmax<=1; 222 | addrw_qmax<=current_s3; 223 | data_in_qmax<=sum; 224 | //$display("stage 4 update qmax data_in_qmax: %02h", data_in_qmax); 225 | //$display("stage 4 update qmax addrw_qmax: %06b", addrw_qmax); 226 | end 227 | //write back to q table 228 | wflag_q<=1; 229 | addrw_q<={current_s3,current_a3}; 230 | data_in_q<=sum; 231 | //$display("stage 4 update q data_in_q: %02h", data_in_q); 232 | //$display("stage 4 update q addrw_q: %08b", addrw_q); 233 | //stop the pipeline if reached end state 234 | //if (current_s3 == 6'b111111) begin 235 | // $finish; 236 | //end 237 | //end 238 | end 239 | 240 | qtable qt0( 241 | .i_clk(clk), 242 | .i_rst(rst), 243 | .i_addr_r(addrr_q), 244 | .i_addr_w(addrw_q), 245 | .i_read_en(rflag_q), 246 | .i_write_en(wflag_q), 247 | .i_data(data_in_q), 248 | .o_data(data_out_q)); 249 | 250 | qmaxtable qmaxt0( 251 | .i_clk(clk), 252 | .i_rst(rst), 253 | .i_addr_r(addrr_qmax), 254 | .i_addr_w(addrw_qmax), 255 | .i_read_en(rflag_qmax), 256 | .i_write_en(wflag_qmax), 257 | .i_data(data_in_qmax), 258 | .o_data(data_out_qmax)); 259 | 260 | rtable rt0( 261 | .i_clk(clk), 262 | .i_addr(addr_r), 263 | .i_read(rflag_r), 264 | .o_data(data_out_r)); 265 | 266 | endmodule -------------------------------------------------------------------------------- /65536_8/qmaxtable.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | //qmax values stored on BRAM 3 | //width depends on range of q value, depth depends on number of states 4 | module qmaxtable #(parameter ADDR_WIDTH = 16, DATA_WIDTH = 32, DEPTH = 65536) ( 5 | input wire i_clk, 6 | input wire i_rst, 7 | input wire [ADDR_WIDTH-1:0] i_addr_r, 8 | input wire [ADDR_WIDTH-1:0] i_addr_w, 9 | input wire i_read_en, 10 | input wire i_write_en, 11 | input wire [DATA_WIDTH-1:0] i_data, 12 | output reg [DATA_WIDTH-1:0] o_data 13 | //output reg [DATA_WIDTH-1:0] o_data2 14 | ); 15 | integer i; 16 | reg [DATA_WIDTH-1:0] memory_array [0:DEPTH-1]; 17 | //initialize the bram: depends on the test case 18 | /*initial begin 19 | memory_array[0] <= 0; 20 | for (i=0;i11 or (8,7)=>10 gets big reward 53 | 19'b1111_1110_1111_1111_100: o_data<= {DATA_WIDTH{1'b1}}; 54 | 19'b1111_1111_1111_1110_110: o_data<= {DATA_WIDTH{1'b1}}; 55 | 19'b1111_1110_1111_1110_101: o_data<= {DATA_WIDTH{1'b1}}; 56 | //... depends on the dataset?? 57 | 58 | 59 | default : o_data<= {DATA_WIDTH{1'b0}}; //others no reward 60 | endcase 61 | end 62 | endmodule 63 | -------------------------------------------------------------------------------- /65536_8/testbench.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 09/02/2019 10:56:26 AM 7 | // Design Name: 8 | // Module Name: testbench 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | module testbench( ); 22 | reg i_clk; 23 | reg i_rst; 24 | reg [2:0] a; 25 | reg[31:0] al; //xxxx.xxxx 0000_0010=0.125, fixed point representation for alpha and gamma 26 | reg[31:0] ga; 27 | //reg[7:0] alpha_in; 28 | //reg[7:0] gamma_in; 29 | wire [95:0] out; 30 | 31 | initial begin 32 | i_clk<=1; 33 | //#5 34 | i_rst<=1; 35 | al<=8'b0000_0010; 36 | ga<=8'b0000_0010; 37 | 38 | #10 i_rst<=0; 39 | forever begin 40 | #20 a<=$urandom%8; 41 | end 42 | //alpha_in=8'b0000_0001; 43 | //gamma_in=8'b0000_0001; 44 | end 45 | 46 | always begin 47 | #10 i_clk=~i_clk; 48 | 49 | end 50 | 51 | pipeline test( 52 | .clk(i_clk), 53 | .rst(i_rst), 54 | .action(a), 55 | //.alpha(al), 56 | //.gamma(ga), 57 | //.cina(ina), 58 | //.cinb(inb), 59 | //.alpha(alpha_in), 60 | //.gamma(gamma_in), 61 | .sum(out)); 62 | 63 | endmodule 64 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # q-learning-accel-fpga 2 | 3 | This project implements the pipeline aaccelerator design for Q Learning as described in the paper: 4 | ``` 5 | @inproceedings{meng2020qtaccel, 6 | title={QTAccel: A Generic FPGA based Design for Q-Table based Reinforcement Learning Accelerators}, 7 | author={Meng, Yuan and Kuppannagari, Sanmukh and Rajat, Rachit and Srivastava, Ajitesh and Kannan, Rajgopal and Prasanna, Viktor}, 8 | booktitle={2020 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)}, 9 | pages={107--114}, 10 | year={2020}, 11 | organization={IEEE} 12 | } 13 | ``` 14 | ![alt text](https://github.com/CatherineMeng/q-learning-accel-fpga/blob/master/pipqrl.PNG) 15 | 16 | # Running Procedure 17 | Step1: Open vivado Design Suite, create new project. 18 | 19 | Step2: Add design source files: pipeline.v, rtable.v, qmaxtable.v, qtable.v 20 | 21 | Step 3: Add simulation source file: testbench.v 22 | 23 | After done step 2 and 3 should see some architecture like this (files names might be different): ![alt text](https://github.com/CatherineMeng/q-learning-accel-fpga/blob/master/Screen%20Shot%202019-09-02%20at%202.20.06%20AM.png) 24 | 25 | 26 | 27 | Step 4: Click run simulation on the left to run simulation. Once done, generate RTL design diagram 28 | 29 | -------------------------------------------------------------------------------- /Screen Shot 2019-09-02 at 2.20.06 AM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CatherineMeng/q-learning-accel-fpga/d995e208ccfdf84e823339822ee91c5e332af7c4/Screen Shot 2019-09-02 at 2.20.06 AM.png -------------------------------------------------------------------------------- /pipeline.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 09/02/2019 10:53:28 AM 7 | // Design Name: 8 | // Module Name: pipeline 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | 22 | 23 | //store tables in BRAMs 24 | //width depends on range of q value, depth depends on number of states times num of actions 25 | 26 | //The 4-stage pipeline 27 | //inputs: action 28 | module pipeline #(parameter ADDR_WIDTH = 8, DATA_WIDTH = 32, DEPTH = 16) ( input clk,input rst, input[1:0] action, output reg[47:0] sum); 29 | 30 | //used in stage 1 31 | reg[DATA_WIDTH-1:0] q; //q value 32 | reg[DATA_WIDTH-1:0] r; //reward 33 | reg[DATA_WIDTH-1:0] q1; //q value 34 | reg[DATA_WIDTH-1:0] r1; //reward 35 | reg[DATA_WIDTH-1:0] qmax; 36 | 37 | reg[5:0] s; //2^6 possible states (8x8 (x,y) grid, s[5:3]s -> x, s[2:0] -> y) 38 | reg[7:0] alpha; 39 | reg[7:0] oneminusa; //1-alpha 40 | reg[7:0] gamma; 41 | reg[15:0] ag; //alpha*gamma 42 | 43 | //propagate for qmax writing address 44 | reg[5:0] current_s ; 45 | reg[5:0] current_s1 ; 46 | reg[5:0] current_s2 ; 47 | reg[5:0] current_s3 ; 48 | 49 | //propagate for q writing address 50 | reg[1:0] current_a ; 51 | reg[1:0] current_a1 ; 52 | reg[1:0] current_a2 ; 53 | reg[1:0] current_a3 ; 54 | 55 | reg[2:0] sx ; // s[5:3]s -> x, 56 | reg[2:0] sy ; // s[2:0] -> y) 57 | reg[5:0] nexts; //next state for state transition 58 | 59 | //used in stage 2 60 | 61 | //used in stage 3 62 | //reg [23:0] sum; 63 | wire agvalid; 64 | //used in stage 1 and 4 65 | //used for q table reading & writing 66 | reg [ADDR_WIDTH-1:0] addrr_q; 67 | reg [ADDR_WIDTH-1:0] addrw_q; 68 | //reg [7:0] addrr_q_tmp; 69 | //reg [7:0] addr_r_tmp; 70 | reg rflag_q; //0 or 1 71 | reg wflag_q; //0 or 1 72 | reg [DATA_WIDTH-1:0] data_in_q; 73 | wire [DATA_WIDTH-1:0] data_out_q; 74 | 75 | //used for qmax table reading & writing 76 | reg [5:0] addrr_qmax; 77 | reg [5:0] addrw_qmax; 78 | reg rflag_qmax; //0 or 1 79 | reg wflag_qmax; //0 or 1 80 | reg [DATA_WIDTH-1:0] data_in_qmax; 81 | wire [DATA_WIDTH-1:0] data_out_qmax; 82 | 83 | //used for r table reading 84 | reg [ADDR_WIDTH-1:0] addr_r; 85 | reg rflag_r; //0 or 1 86 | wire [DATA_WIDTH-1:0] data_out_r; 87 | localparam sf = 2.0**-4.0; 88 | //--------------stage 1----------------- 89 | always @(posedge clk) begin 90 | //initialize state and action 91 | if (rst) begin 92 | s<=6'b000_000; 93 | current_s<=6'b000000; 94 | nexts<=6'b000000;; 95 | alpha<=8'b0000_0010; //0.8 96 | gamma<=8'b0000_0010; 97 | end 98 | 99 | //calculate 1-a and a*g 100 | //scaling factor=2.0**-4.0 _ 101 | ag <= alpha*gamma; 102 | oneminusa <= 8'b0001_0000 - alpha; 103 | 104 | //locate next state 105 | sx<=s[5:3];sy<=s[2:0]; 106 | if (sx==3'b000 && action==2'b00) begin //left wall 107 | nexts<=s; 108 | end 109 | else if (sy==3'b000 && action==2'b01) begin //up wall 110 | nexts<=s; 111 | end 112 | else if (sx==3'b111 && action==2'b10) begin //right wall 113 | nexts<=s; 114 | end 115 | else if (sy==3'b111 && action==2'b11) begin //down wall 116 | nexts<=s; 117 | end 118 | else begin 119 | case (action) 120 | 2'b00: nexts<=s-6'b001000;//to the left by 1 121 | 2'b01: nexts<=s-6'b000001;//to the up by 1 122 | 2'b10: nexts<=s+6'b001000;//to the right by 1 123 | 2'b11: nexts<=s+6'b000001;//to the down by 1 124 | //default: 125 | endcase 126 | //nexts<={sx,sy}; 127 | end 128 | 129 | //get address for q and r and qmax 130 | addrr_q<={s,action}; 131 | addr_r<={s,action}; 132 | addrr_qmax<=nexts; 133 | 134 | //wait and transit the state 135 | current_s<=s; 136 | current_s1<=current_s; 137 | current_a<=action; 138 | current_a1<=current_a; 139 | s<=nexts; 140 | end 141 | 142 | 143 | 144 | //--------------stage 2----------------- 145 | always @(posedge clk) begin 146 | //locate q value from q table, save in q register 147 | rflag_q<=1; 148 | q<=data_out_q; 149 | q1<=q; 150 | 151 | rflag_r<=1; 152 | r<=data_out_r; 153 | r1<=r; 154 | 155 | //locate Qmax at next state from Qmax table 156 | rflag_qmax<=1; 157 | qmax<=data_out_qmax; 158 | 159 | current_s2<=current_s1; 160 | current_a2<=current_a1; 161 | 162 | end 163 | 164 | //--------------stage 3----------------- 165 | //always @(qmax or r or q or ag or oneminusa) begin 166 | 167 | /*reg [23:0] sum_part1; 168 | reg [23:0] sum_part2; 169 | reg [23:0] sum_part3; 170 | 171 | always@(posedge clk) 172 | begin 173 | sum_part1 <= alpha*r1; 174 | sum_part2 <= oneminusa*q1; 175 | sum_part3 <= ag*qmax; 176 | end 177 | 178 | 179 | always @(posedge clk) begin 180 | //calculations of q learning function 181 | //adder 182 | sum <= sum_part1 + sum_part2 + sum_part3; 183 | 184 | current_s3<=current_s2; 185 | current_a3<=current_a2; 186 | end*/ 187 | always @(posedge clk) begin 188 | //calculations of q learning function 189 | //adder 190 | sum <= alpha*r1 + oneminusa*q1 + ag*qmax; 191 | //sum <= alpha*r1*2**(-4) + oneminusa*q1*2**(-4) + ag*qmax*2**(-8); 192 | 193 | current_s3<=current_s2; 194 | current_a3<=current_a2; 195 | 196 | end 197 | 198 | 199 | 200 | //--------------stage 4----------------- 201 | //always @(sum) begin 202 | always @(posedge clk) begin 203 | // if(ce) begin 204 | //write back to qmax table 205 | if (sum>q)begin 206 | wflag_qmax<=1; 207 | addrw_qmax<=current_s3; 208 | data_in_qmax<=sum; 209 | end 210 | //write back to q table 211 | wflag_q<=1; 212 | addrw_q<={current_s3,current_a3}; 213 | data_in_q<=sum; 214 | //stop the pipeline if reached end state 215 | //if (current_s3 == 6'b111111) begin 216 | // $finish; 217 | //end 218 | //end 219 | end 220 | 221 | qtable qt0( 222 | .i_clk(clk), 223 | .i_rst(rst), 224 | .i_addr_r(addrr_q), 225 | .i_addr_w(addrw_q), 226 | .i_read_en(rflag_q), 227 | .i_write_en(wflag_q), 228 | .i_data(data_in_q), 229 | .o_data(data_out_q)); 230 | 231 | qmaxtable qmaxt0( 232 | .i_clk(clk), 233 | .i_rst(rst), 234 | .i_addr_r(addrr_qmax), 235 | .i_addr_w(addrw_qmax), 236 | .i_read_en(rflag_qmax), 237 | .i_write_en(wflag_qmax), 238 | .i_data(data_in_qmax), 239 | .o_data(data_out_qmax)); 240 | 241 | rtable rt0( 242 | .i_clk(clk), 243 | .i_addr(addr_r), 244 | .i_read(rflag_r), 245 | .o_data(data_out_r)); 246 | 247 | /* floating_point_0 mult ( 248 | .aclk(clk), // input wire aclk 249 | .s_axis_a_tvalid(1'b0), // input wire s_axis_a_tvalid 250 | .s_axis_a_tdata(alpha), // input wire [31 : 0] s_axis_a_tdata 251 | .s_axis_b_tvalid(1'b0), // input wire s_axis_b_tvalid 252 | .s_axis_b_tdata(gamma), // input wire [31 : 0] s_axis_b_tdata 253 | .m_axis_result_tvalid(agvalid), // output wire m_axis_result_tvalid 254 | .m_axis_result_tdata(ag) // output wire [31 : 0] m_axis_result_tdata 255 | );*/ 256 | 257 | endmodul`timescale 1ns / 1ps 258 | ////////////////////////////////////////////////////////////////////////////////// 259 | // Company: 260 | // Engineer: 261 | // 262 | // Create Date: 09/02/2019 10:53:28 AM 263 | // Design Name: 264 | // Module Name: pipeline 265 | // Project Name: 266 | // Target Devices: 267 | // Tool Versions: 268 | // Description: 269 | // 270 | // Dependencies: 271 | // 272 | // Revision: 273 | // Revision 0.01 - File Created 274 | // Additional Comments: 275 | // 276 | ////////////////////////////////////////////////////////////////////////////////// 277 | 278 | 279 | //store tables in BRAMs 280 | //width depends on range of q value, depth depends on number of states times num of actions 281 | 282 | //The 4-stage pipeline 283 | //inputs: action 284 | module pipeline #(parameter ADDR_WIDTH = 8, DATA_WIDTH = 32, DEPTH = 16) ( input clk,input rst, input[1:0] action, output reg[47:0] sum); 285 | 286 | //used in stage 1 287 | reg[DATA_WIDTH-1:0] q; //q value 288 | reg[DATA_WIDTH-1:0] r; //reward 289 | reg[DATA_WIDTH-1:0] q1; //q value 290 | reg[DATA_WIDTH-1:0] r1; //reward 291 | reg[DATA_WIDTH-1:0] qmax; 292 | 293 | reg[5:0] s; //2^6 possible states (8x8 (x,y) grid, s[5:3]s -> x, s[2:0] -> y) 294 | reg[7:0] alpha; 295 | reg[7:0] oneminusa; //1-alpha 296 | reg[7:0] gamma; 297 | reg[15:0] ag; //alpha*gamma 298 | 299 | //propagate for qmax writing address 300 | reg[5:0] current_s ; 301 | reg[5:0] current_s1 ; 302 | reg[5:0] current_s2 ; 303 | reg[5:0] current_s3 ; 304 | 305 | //propagate for q writing address 306 | reg[1:0] current_a ; 307 | reg[1:0] current_a1 ; 308 | reg[1:0] current_a2 ; 309 | reg[1:0] current_a3 ; 310 | 311 | reg[2:0] sx ; // s[5:3]s -> x, 312 | reg[2:0] sy ; // s[2:0] -> y) 313 | reg[5:0] nexts; //next state for state transition 314 | 315 | //used in stage 2 316 | 317 | //used in stage 3 318 | //reg [23:0] sum; 319 | wire agvalid; 320 | //used in stage 1 and 4 321 | //used for q table reading & writing 322 | reg [ADDR_WIDTH-1:0] addrr_q; 323 | reg [ADDR_WIDTH-1:0] addrw_q; 324 | //reg [7:0] addrr_q_tmp; 325 | //reg [7:0] addr_r_tmp; 326 | reg rflag_q; //0 or 1 327 | reg wflag_q; //0 or 1 328 | reg [DATA_WIDTH-1:0] data_in_q; 329 | wire [DATA_WIDTH-1:0] data_out_q; 330 | 331 | //used for qmax table reading & writing 332 | reg [5:0] addrr_qmax; 333 | reg [5:0] addrw_qmax; 334 | reg rflag_qmax; //0 or 1 335 | reg wflag_qmax; //0 or 1 336 | reg [DATA_WIDTH-1:0] data_in_qmax; 337 | wire [DATA_WIDTH-1:0] data_out_qmax; 338 | 339 | //used for r table reading 340 | reg [ADDR_WIDTH-1:0] addr_r; 341 | reg rflag_r; //0 or 1 342 | wire [DATA_WIDTH-1:0] data_out_r; 343 | localparam sf = 2.0**-4.0; 344 | //--------------stage 1----------------- 345 | always @(posedge clk) begin 346 | //initialize state and action 347 | if (rst) begin 348 | s<=6'b000_000; 349 | current_s<=6'b000000; 350 | nexts<=6'b000000;; 351 | alpha<=8'b0000_0010; //0.8 352 | gamma<=8'b0000_0010; 353 | end 354 | 355 | //calculate 1-a and a*g 356 | //scaling factor=2.0**-4.0 _ 357 | ag <= alpha*gamma; 358 | oneminusa <= 8'b0001_0000 - alpha; 359 | 360 | //locate next state 361 | sx<=s[5:3];sy<=s[2:0]; 362 | if (sx==3'b000 && action==2'b00) begin //left wall 363 | nexts<=s; 364 | end 365 | else if (sy==3'b000 && action==2'b01) begin //up wall 366 | nexts<=s; 367 | end 368 | else if (sx==3'b111 && action==2'b10) begin //right wall 369 | nexts<=s; 370 | end 371 | else if (sy==3'b111 && action==2'b11) begin //down wall 372 | nexts<=s; 373 | end 374 | else begin 375 | case (action) 376 | 2'b00: nexts<=s-6'b001000;//to the left by 1 377 | 2'b01: nexts<=s-6'b000001;//to the up by 1 378 | 2'b10: nexts<=s+6'b001000;//to the right by 1 379 | 2'b11: nexts<=s+6'b000001;//to the down by 1 380 | //default: 381 | endcase 382 | //nexts<={sx,sy}; 383 | end 384 | 385 | //get address for q and r and qmax 386 | addrr_q<={s,action}; 387 | addr_r<={s,action}; 388 | addrr_qmax<=nexts; 389 | 390 | //wait and transit the state 391 | current_s<=s; 392 | current_s1<=current_s; 393 | current_a<=action; 394 | current_a1<=current_a; 395 | s<=nexts; 396 | end 397 | 398 | 399 | 400 | //--------------stage 2----------------- 401 | always @(posedge clk) begin 402 | //locate q value from q table, save in q register 403 | rflag_q<=1; 404 | q<=data_out_q; 405 | q1<=q; 406 | 407 | rflag_r<=1; 408 | r<=data_out_r; 409 | r1<=r; 410 | //locate Qmax at next state from Qmax table 411 | 412 | rflag_qmax<=1; 413 | qmax<=data_out_qmax; 414 | 415 | current_s2<=current_s1; 416 | current_a2<=current_a1; 417 | 418 | end 419 | 420 | //--------------stage 3----------------- 421 | //always @(qmax or r or q or ag or oneminusa) begin 422 | 423 | /*reg [23:0] sum_part1; 424 | reg [23:0] sum_part2; 425 | reg [23:0] sum_part3; 426 | 427 | always@(posedge clk) 428 | begin 429 | sum_part1 <= alpha*r1; 430 | sum_part2 <= oneminusa*q1; 431 | sum_part3 <= ag*qmax; 432 | end 433 | 434 | 435 | always @(posedge clk) begin 436 | //calculations of q learning function 437 | //adder 438 | sum <= sum_part1 + sum_part2 + sum_part3; 439 | //$display("stage 3 sum: %04h", sum); 440 | 441 | current_s3<=current_s2; 442 | current_a3<=current_a2; 443 | end*/ 444 | always @(posedge clk) begin 445 | //calculations of q learning function 446 | //adder 447 | sum <= alpha*r1 + oneminusa*q1 + ag*qmax; 448 | //sum <= alpha*r1*2**(-4) + oneminusa*q1*2**(-4) + ag*qmax*2**(-8); 449 | 450 | current_s3<=current_s2; 451 | current_a3<=current_a2; 452 | 453 | end 454 | 455 | 456 | 457 | //--------------stage 4----------------- 458 | //always @(sum) begin 459 | always @(posedge clk) begin 460 | // if(ce) begin 461 | //write back to qmax table 462 | if (sum>q)begin 463 | wflag_qmax<=1; 464 | addrw_qmax<=current_s3; 465 | data_in_qmax<=sum; 466 | end 467 | //write back to q table 468 | wflag_q<=1; 469 | addrw_q<={current_s3,current_a3}; 470 | data_in_q<=sum; 471 | //stop the pipeline if reached end state 472 | //if (current_s3 == 6'b111111) begin 473 | // $finish; 474 | //end 475 | //end 476 | end 477 | 478 | qtable qt0( 479 | .i_clk(clk), 480 | .i_rst(rst), 481 | .i_addr_r(addrr_q), 482 | .i_addr_w(addrw_q), 483 | .i_read_en(rflag_q), 484 | .i_write_en(wflag_q), 485 | .i_data(data_in_q), 486 | .o_data(data_out_q)); 487 | 488 | qmaxtable qmaxt0( 489 | .i_clk(clk), 490 | .i_rst(rst), 491 | .i_addr_r(addrr_qmax), 492 | .i_addr_w(addrw_qmax), 493 | .i_read_en(rflag_qmax), 494 | .i_write_en(wflag_qmax), 495 | .i_data(data_in_qmax), 496 | .o_data(data_out_qmax)); 497 | 498 | rtable rt0( 499 | .i_clk(clk), 500 | .i_addr(addr_r), 501 | .i_read(rflag_r), 502 | .o_data(data_out_r)); 503 | 504 | /* floating_point_0 mult ( 505 | .aclk(clk), // input wire aclk 506 | .s_axis_a_tvalid(1'b0), // input wire s_axis_a_tvalid 507 | .s_axis_a_tdata(alpha), // input wire [31 : 0] s_axis_a_tdata 508 | .s_axis_b_tvalid(1'b0), // input wire s_axis_b_tvalid 509 | .s_axis_b_tdata(gamma), // input wire [31 : 0] s_axis_b_tdata 510 | .m_axis_result_tvalid(agvalid), // output wire m_axis_result_tvalid 511 | .m_axis_result_tdata(ag) // output wire [31 : 0] m_axis_result_tdata 512 | );*/ 513 | 514 | endmodule 515 | -------------------------------------------------------------------------------- /pipeline_power_routed.rpt: -------------------------------------------------------------------------------- 1 | Copyright 1986-2019 Xilinx, Inc. All Rights Reserved. 2 | ---------------------------------------------------------------------------------------------------------------------------------------------- 3 | | Tool Version : Vivado v.2019.1 (win64) Build 2552052 Fri May 24 14:49:42 MDT 2019 4 | | Date : Sat Sep 14 17:16:25 2019 5 | | Host : DESKTOP-18L2VEF running 64-bit major release (build 9200) 6 | | Command : report_power -file pipeline_power_routed.rpt -pb pipeline_power_summary_routed.pb -rpx pipeline_power_routed.rpx 7 | | Design : pipeline 8 | | Device : xc7vx485tffg1157-1 9 | | Design State : routed 10 | | Grade : commercial 11 | | Process : typical 12 | | Characterization : Production 13 | ---------------------------------------------------------------------------------------------------------------------------------------------- 14 | 15 | Power Report 16 | 17 | Table of Contents 18 | ----------------- 19 | 1. Summary 20 | 1.1 On-Chip Components 21 | 1.2 Power Supply Summary 22 | 1.3 Confidence Level 23 | 2. Settings 24 | 2.1 Environment 25 | 2.2 Clock Constraints 26 | 3. Detailed Reports 27 | 3.1 By Hierarchy 28 | 29 | 1. Summary 30 | ---------- 31 | 32 | +--------------------------+--------------+ 33 | | Total On-Chip Power (W) | 0.298 | 34 | | Design Power Budget (W) | Unspecified* | 35 | | Power Budget Margin (W) | NA | 36 | | Dynamic (W) | 0.055 | 37 | | Device Static (W) | 0.243 | 38 | | Effective TJA (C/W) | 1.4 | 39 | | Max Ambient (C) | 84.6 | 40 | | Junction Temperature (C) | 25.4 | 41 | | Confidence Level | Medium | 42 | | Setting File | --- | 43 | | Simulation Activity File | --- | 44 | | Design Nets Matched | NA | 45 | +--------------------------+--------------+ 46 | * Specify Design Power Budget using, set_operating_conditions -design_power_budget 47 | 48 | 49 | 1.1 On-Chip Components 50 | ---------------------- 51 | 52 | +-------------------------+-----------+----------+-----------+-----------------+ 53 | | On-Chip | Power (W) | Used | Available | Utilization (%) | 54 | +-------------------------+-----------+----------+-----------+-----------------+ 55 | | Clocks | 0.002 | 3 | --- | --- | 56 | | Slice Logic | 0.002 | 417 | --- | --- | 57 | | LUT as Logic | 0.001 | 79 | 303600 | 0.03 | 58 | | Register | <0.001 | 233 | 607200 | 0.04 | 59 | | CARRY4 | <0.001 | 20 | 75900 | 0.03 | 60 | | LUT as Shift Register | <0.001 | 6 | 130800 | <0.01 | 61 | | Others | 0.000 | 10 | --- | --- | 62 | | Signals | 0.004 | 720 | --- | --- | 63 | | Block RAM | 0.007 | 1 | 1030 | 0.10 | 64 | | DSPs | 0.003 | 6 | 2800 | 0.21 | 65 | | I/O | 0.037 | 51 | 600 | 8.50 | 66 | | Static Power | 0.243 | | | | 67 | | Total | 0.298 | | | | 68 | +-------------------------+-----------+----------+-----------+-----------------+ 69 | 70 | 71 | 1.2 Power Supply Summary 72 | ------------------------ 73 | 74 | +-----------+-------------+-----------+-------------+------------+ 75 | | Source | Voltage (V) | Total (A) | Dynamic (A) | Static (A) | 76 | +-----------+-------------+-----------+-------------+------------+ 77 | | Vccint | 1.000 | 0.152 | 0.018 | 0.134 | 78 | | Vccaux | 1.800 | 0.041 | 0.003 | 0.038 | 79 | | Vcco33 | 3.300 | 0.000 | 0.000 | 0.000 | 80 | | Vcco25 | 2.500 | 0.000 | 0.000 | 0.000 | 81 | | Vcco18 | 1.800 | 0.019 | 0.018 | 0.001 | 82 | | Vcco15 | 1.500 | 0.000 | 0.000 | 0.000 | 83 | | Vcco135 | 1.350 | 0.000 | 0.000 | 0.000 | 84 | | Vcco12 | 1.200 | 0.000 | 0.000 | 0.000 | 85 | | Vccaux_io | 1.800 | 0.000 | 0.000 | 0.000 | 86 | | Vccbram | 1.000 | 0.003 | 0.000 | 0.003 | 87 | | MGTAVcc | 1.000 | 0.000 | 0.000 | 0.000 | 88 | | MGTAVtt | 1.200 | 0.000 | 0.000 | 0.000 | 89 | | MGTVccaux | 1.800 | 0.000 | 0.000 | 0.000 | 90 | | MGTZVccl | 1.075 | 0.000 | 0.000 | 0.000 | 91 | | MGTZAVcc | 1.075 | 0.000 | 0.000 | 0.000 | 92 | | MGTZVcch | 1.800 | 0.000 | 0.000 | 0.000 | 93 | | Vccadc | 1.800 | 0.020 | 0.000 | 0.020 | 94 | +-----------+-------------+-----------+-------------+------------+ 95 | 96 | 97 | 1.3 Confidence Level 98 | -------------------- 99 | 100 | +-----------------------------+------------+-------------------------------------------------------+------------------------------------------------------------------------------------------------------------+ 101 | | User Input Data | Confidence | Details | Action | 102 | +-----------------------------+------------+-------------------------------------------------------+------------------------------------------------------------------------------------------------------------+ 103 | | Design implementation state | High | Design is routed | | 104 | | Clock nodes activity | High | User specified more than 95% of clocks | | 105 | | I/O nodes activity | Medium | More than 5% of inputs are missing user specification | Provide missing input activity with simulation results or by editing the "By Resource Type -> I/Os" view | 106 | | Internal nodes activity | Medium | User specified less than 25% of internal nodes | Provide missing internal nodes activity with simulation results or by editing the "By Resource Type" views | 107 | | Device models | High | Device models are Production | | 108 | | | | | | 109 | | Overall confidence level | Medium | | | 110 | +-----------------------------+------------+-------------------------------------------------------+------------------------------------------------------------------------------------------------------------+ 111 | 112 | 113 | 2. Settings 114 | ----------- 115 | 116 | 2.1 Environment 117 | --------------- 118 | 119 | +-----------------------+--------------------------+ 120 | | Ambient Temp (C) | 25.0 | 121 | | ThetaJA (C/W) | 1.4 | 122 | | Airflow (LFM) | 250 | 123 | | Heat Sink | medium (Medium Profile) | 124 | | ThetaSA (C/W) | 2.4 | 125 | | Board Selection | medium (10"x10") | 126 | | # of Board Layers | 12to15 (12 to 15 Layers) | 127 | | Board Temperature (C) | 25.0 | 128 | +-----------------------+--------------------------+ 129 | 130 | 131 | 2.2 Clock Constraints 132 | --------------------- 133 | 134 | +-------+--------+-----------------+ 135 | | Clock | Domain | Constraint (ns) | 136 | +-------+--------+-----------------+ 137 | | clk | clk | 10.0 | 138 | +-------+--------+-----------------+ 139 | 140 | 141 | 3. Detailed Reports 142 | ------------------- 143 | 144 | 3.1 By Hierarchy 145 | ---------------- 146 | 147 | +----------+-----------+ 148 | | Name | Power (W) | 149 | +----------+-----------+ 150 | | pipeline | 0.055 | 151 | | qmaxt0 | 0.004 | 152 | | qt0 | 0.004 | 153 | +----------+-----------+ 154 | 155 | 156 | -------------------------------------------------------------------------------- /pipeline_utilization_placed.rpt: -------------------------------------------------------------------------------- 1 | Copyright 1986-2019 Xilinx, Inc. All Rights Reserved. 2 | ------------------------------------------------------------------------------------------------------------- 3 | | Tool Version : Vivado v.2019.1 (win64) Build 2552052 Fri May 24 14:49:42 MDT 2019 4 | | Date : Sat Sep 14 17:15:21 2019 5 | | Host : DESKTOP-18L2VEF running 64-bit major release (build 9200) 6 | | Command : report_utilization -file pipeline_utilization_placed.rpt -pb pipeline_utilization_placed.pb 7 | | Design : pipeline 8 | | Device : 7vx485tffg1157-1 9 | | Design State : Fully Placed 10 | ------------------------------------------------------------------------------------------------------------- 11 | 12 | Utilization Design Information 13 | 14 | Table of Contents 15 | ----------------- 16 | 1. Slice Logic 17 | 1.1 Summary of Registers by Type 18 | 2. Slice Logic Distribution 19 | 3. Memory 20 | 4. DSP 21 | 5. IO and GT Specific 22 | 6. Clocking 23 | 7. Specific Feature 24 | 8. Primitives 25 | 9. Black Boxes 26 | 10. Instantiated Netlists 27 | 28 | 1. Slice Logic 29 | -------------- 30 | 31 | +----------------------------+------+-------+-----------+-------+ 32 | | Site Type | Used | Fixed | Available | Util% | 33 | +----------------------------+------+-------+-----------+-------+ 34 | | Slice LUTs | 85 | 0 | 303600 | 0.03 | 35 | | LUT as Logic | 79 | 0 | 303600 | 0.03 | 36 | | LUT as Memory | 6 | 0 | 130800 | <0.01 | 37 | | LUT as Distributed RAM | 0 | 0 | | | 38 | | LUT as Shift Register | 6 | 0 | | | 39 | | Slice Registers | 233 | 0 | 607200 | 0.04 | 40 | | Register as Flip Flop | 233 | 0 | 607200 | 0.04 | 41 | | Register as Latch | 0 | 0 | 607200 | 0.00 | 42 | | F7 Muxes | 0 | 0 | 151800 | 0.00 | 43 | | F8 Muxes | 0 | 0 | 75900 | 0.00 | 44 | +----------------------------+------+-------+-----------+-------+ 45 | 46 | 47 | 1.1 Summary of Registers by Type 48 | -------------------------------- 49 | 50 | +-------+--------------+-------------+--------------+ 51 | | Total | Clock Enable | Synchronous | Asynchronous | 52 | +-------+--------------+-------------+--------------+ 53 | | 0 | _ | - | - | 54 | | 0 | _ | - | Set | 55 | | 0 | _ | - | Reset | 56 | | 0 | _ | Set | - | 57 | | 0 | _ | Reset | - | 58 | | 0 | Yes | - | - | 59 | | 0 | Yes | - | Set | 60 | | 0 | Yes | - | Reset | 61 | | 0 | Yes | Set | - | 62 | | 233 | Yes | Reset | - | 63 | +-------+--------------+-------------+--------------+ 64 | 65 | 66 | 2. Slice Logic Distribution 67 | --------------------------- 68 | 69 | +--------------------------------------------+------+-------+-----------+-------+ 70 | | Site Type | Used | Fixed | Available | Util% | 71 | +--------------------------------------------+------+-------+-----------+-------+ 72 | | Slice | 68 | 0 | 75900 | 0.09 | 73 | | SLICEL | 44 | 0 | | | 74 | | SLICEM | 24 | 0 | | | 75 | | LUT as Logic | 79 | 0 | 303600 | 0.03 | 76 | | using O5 output only | 0 | | | | 77 | | using O6 output only | 12 | | | | 78 | | using O5 and O6 | 67 | | | | 79 | | LUT as Memory | 6 | 0 | 130800 | <0.01 | 80 | | LUT as Distributed RAM | 0 | 0 | | | 81 | | LUT as Shift Register | 6 | 0 | | | 82 | | using O5 output only | 0 | | | | 83 | | using O6 output only | 4 | | | | 84 | | using O5 and O6 | 2 | | | | 85 | | Slice Registers | 233 | 0 | 607200 | 0.04 | 86 | | Register driven from within the Slice | 24 | | | | 87 | | Register driven from outside the Slice | 209 | | | | 88 | | LUT in front of the register is unused | 179 | | | | 89 | | LUT in front of the register is used | 30 | | | | 90 | | Unique Control Sets | 2 | | 75900 | <0.01 | 91 | +--------------------------------------------+------+-------+-----------+-------+ 92 | * Note: Available Control Sets calculated as Slice Registers / 8, Review the Control Sets Report for more information regarding control sets. 93 | 94 | 95 | 3. Memory 96 | --------- 97 | 98 | +-------------------+------+-------+-----------+-------+ 99 | | Site Type | Used | Fixed | Available | Util% | 100 | +-------------------+------+-------+-----------+-------+ 101 | | Block RAM Tile | 1 | 0 | 1030 | 0.10 | 102 | | RAMB36/FIFO* | 0 | 0 | 1030 | 0.00 | 103 | | RAMB18 | 2 | 0 | 2060 | 0.10 | 104 | | RAMB18E1 only | 2 | | | | 105 | +-------------------+------+-------+-----------+-------+ 106 | * Note: Each Block RAM Tile only has one FIFO logic available and therefore can accommodate only one FIFO36E1 or one FIFO18E1. However, if a FIFO18E1 occupies a Block RAM Tile, that tile can still accommodate a RAMB18E1 107 | 108 | 109 | 4. DSP 110 | ------ 111 | 112 | +----------------+------+-------+-----------+-------+ 113 | | Site Type | Used | Fixed | Available | Util% | 114 | +----------------+------+-------+-----------+-------+ 115 | | DSPs | 6 | 0 | 2800 | 0.21 | 116 | | DSP48E1 only | 6 | | | | 117 | +----------------+------+-------+-----------+-------+ 118 | 119 | 120 | 5. IO and GT Specific 121 | --------------------- 122 | 123 | +-----------------------------+------+-------+-----------+-------+ 124 | | Site Type | Used | Fixed | Available | Util% | 125 | +-----------------------------+------+-------+-----------+-------+ 126 | | Bonded IOB | 51 | 0 | 600 | 8.50 | 127 | | IOB Master Pads | 25 | | | | 128 | | IOB Slave Pads | 24 | | | | 129 | | Bonded IPADs | 0 | 0 | 62 | 0.00 | 130 | | Bonded OPADs | 0 | 0 | 40 | 0.00 | 131 | | PHY_CONTROL | 0 | 0 | 14 | 0.00 | 132 | | PHASER_REF | 0 | 0 | 14 | 0.00 | 133 | | OUT_FIFO | 0 | 0 | 56 | 0.00 | 134 | | IN_FIFO | 0 | 0 | 56 | 0.00 | 135 | | IDELAYCTRL | 0 | 0 | 14 | 0.00 | 136 | | IBUFDS | 0 | 0 | 576 | 0.00 | 137 | | GTXE2_COMMON | 0 | 0 | 5 | 0.00 | 138 | | GTXE2_CHANNEL | 0 | 0 | 20 | 0.00 | 139 | | PHASER_OUT/PHASER_OUT_PHY | 0 | 0 | 56 | 0.00 | 140 | | PHASER_IN/PHASER_IN_PHY | 0 | 0 | 56 | 0.00 | 141 | | IDELAYE2/IDELAYE2_FINEDELAY | 0 | 0 | 700 | 0.00 | 142 | | ODELAYE2/ODELAYE2_FINEDELAY | 0 | 0 | 700 | 0.00 | 143 | | IBUFDS_GTE2 | 0 | 0 | 10 | 0.00 | 144 | | ILOGIC | 0 | 0 | 600 | 0.00 | 145 | | OLOGIC | 0 | 0 | 600 | 0.00 | 146 | +-----------------------------+------+-------+-----------+-------+ 147 | 148 | 149 | 6. Clocking 150 | ----------- 151 | 152 | +------------+------+-------+-----------+-------+ 153 | | Site Type | Used | Fixed | Available | Util% | 154 | +------------+------+-------+-----------+-------+ 155 | | BUFGCTRL | 1 | 0 | 32 | 3.13 | 156 | | BUFIO | 0 | 0 | 56 | 0.00 | 157 | | MMCME2_ADV | 0 | 0 | 14 | 0.00 | 158 | | PLLE2_ADV | 0 | 0 | 14 | 0.00 | 159 | | BUFMRCE | 0 | 0 | 28 | 0.00 | 160 | | BUFHCE | 0 | 0 | 168 | 0.00 | 161 | | BUFR | 0 | 0 | 56 | 0.00 | 162 | +------------+------+-------+-----------+-------+ 163 | 164 | 165 | 7. Specific Feature 166 | ------------------- 167 | 168 | +-------------+------+-------+-----------+-------+ 169 | | Site Type | Used | Fixed | Available | Util% | 170 | +-------------+------+-------+-----------+-------+ 171 | | BSCANE2 | 0 | 0 | 4 | 0.00 | 172 | | CAPTUREE2 | 0 | 0 | 1 | 0.00 | 173 | | DNA_PORT | 0 | 0 | 1 | 0.00 | 174 | | EFUSE_USR | 0 | 0 | 1 | 0.00 | 175 | | FRAME_ECCE2 | 0 | 0 | 1 | 0.00 | 176 | | ICAPE2 | 0 | 0 | 2 | 0.00 | 177 | | PCIE_2_1 | 0 | 0 | 4 | 0.00 | 178 | | STARTUPE2 | 0 | 0 | 1 | 0.00 | 179 | | XADC | 0 | 0 | 1 | 0.00 | 180 | +-------------+------+-------+-----------+-------+ 181 | 182 | 183 | 8. Primitives 184 | ------------- 185 | 186 | +----------+------+---------------------+ 187 | | Ref Name | Used | Functional Category | 188 | +----------+------+---------------------+ 189 | | FDRE | 233 | Flop & Latch | 190 | | LUT4 | 71 | LUT | 191 | | OBUF | 48 | IO | 192 | | LUT3 | 47 | LUT | 193 | | LUT2 | 21 | LUT | 194 | | CARRY4 | 20 | CarryLogic | 195 | | SRL16E | 8 | Distributed Memory | 196 | | DSP48E1 | 6 | Block Arithmetic | 197 | | LUT6 | 3 | LUT | 198 | | LUT5 | 3 | LUT | 199 | | IBUF | 3 | IO | 200 | | RAMB18E1 | 2 | Block Memory | 201 | | LUT1 | 1 | LUT | 202 | | BUFG | 1 | Clock | 203 | +----------+------+---------------------+ 204 | 205 | 206 | 9. Black Boxes 207 | -------------- 208 | 209 | +----------+------+ 210 | | Ref Name | Used | 211 | +----------+------+ 212 | 213 | 214 | 10. Instantiated Netlists 215 | ------------------------- 216 | 217 | +----------+------+ 218 | | Ref Name | Used | 219 | +----------+------+ 220 | 221 | 222 | -------------------------------------------------------------------------------- /pipqrl.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CatherineMeng/q-learning-accel-fpga/d995e208ccfdf84e823339822ee91c5e332af7c4/pipqrl.PNG -------------------------------------------------------------------------------- /qmaxtable.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | //qmax values stored on BRAM 3 | //width depends on range of q value, depth depends on number of states 4 | module qmaxtable #(parameter ADDR_WIDTH = 6, DATA_WIDTH = 32, DEPTH = 64) ( 5 | input wire i_clk, 6 | input wire i_rst, 7 | input wire [ADDR_WIDTH-1:0] i_addr_r, 8 | input wire [ADDR_WIDTH-1:0] i_addr_w, 9 | input wire i_read_en, 10 | input wire i_write_en, 11 | input wire [DATA_WIDTH-1:0] i_data, 12 | output reg [DATA_WIDTH-1:0] o_data 13 | //output reg [DATA_WIDTH-1:0] o_data2 14 | ); 15 | integer i; 16 | reg [DATA_WIDTH-1:0] memory_array [0:DEPTH-1]; 17 | 18 | always @ (posedge i_clk) 19 | begin 20 | /* if (i_rst) begin 21 | memory_array[0] <= 0; 22 | for (i=0;i11 or (8,7)=>10 gets big reward 48 | 8'b110_111_11: o_data<= {DATA_WIDTH{1'b1}}; 49 | 8'b111_110_10: o_data<= {DATA_WIDTH{1'b1}}; 50 | //... depends on the dataset?? 51 | 52 | 53 | default : o_data<= {DATA_WIDTH{1'b0}}; //others no reward 54 | endcase 55 | $display("r read %02h from: %08b\n", o_data, i_addr); 56 | end 57 | endmodule -------------------------------------------------------------------------------- /testbench.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 09/02/2019 10:56:26 AM 7 | // Design Name: 8 | // Module Name: testbench 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | module testbench( ); 22 | reg i_clk; 23 | reg i_rst; 24 | reg [1:0] a; 25 | reg[31:0] al; //xxxx.xxxx 0000_0010=0.125, fixed point representation for alpha and gamma 26 | reg[31:0] ga; 27 | //reg[7:0] alpha_in; 28 | //reg[7:0] gamma_in; 29 | wire [47:0] out; 30 | 31 | initial begin 32 | i_clk<=1; 33 | //#5 34 | i_rst<=1; 35 | //al<=8'b0000_0010; 36 | //ga<=8'b0000_0010; 37 | //al<=8'b00000010; 38 | //ga<=8'b00111111010011001100110011001101; 39 | 40 | #5 i_rst<=0; 41 | forever begin 42 | #10 a<=$urandom%4; 43 | end 44 | //alpha_in=8'b0000_0001; 45 | //gamma_in=8'b0000_0001; 46 | end 47 | 48 | always begin 49 | #5 i_clk=~i_clk; 50 | 51 | end 52 | 53 | pipeline test( 54 | .clk(i_clk), 55 | .rst(i_rst), 56 | .action(a), 57 | //.alpha(al), 58 | //.gamma(ga), 59 | //.cina(ina), 60 | //.cinb(inb), 61 | //.alpha(alpha_in), 62 | //.gamma(gamma_in), 63 | .sum(out)); 64 | 65 | endmodule 66 | `timescale 1ns / 1ps 67 | ////////////////////////////////////////////////////////////////////////////////// 68 | // Company: 69 | // Engineer: 70 | // 71 | // Create Date: 09/02/2019 10:56:26 AM 72 | // Design Name: 73 | // Module Name: testbench 74 | // Project Name: 75 | // Target Devices: 76 | // Tool Versions: 77 | // Description: 78 | // 79 | // Dependencies: 80 | // 81 | // Revision: 82 | // Revision 0.01 - File Created 83 | // Additional Comments: 84 | // 85 | ////////////////////////////////////////////////////////////////////////////////// 86 | module testbench( ); 87 | reg i_clk; 88 | reg i_rst; 89 | reg [1:0] a; 90 | reg[31:0] al; //xxxx.xxxx 0000_0010=0.125, fixed point representation for alpha and gamma 91 | reg[31:0] ga; 92 | //reg[7:0] alpha_in; 93 | //reg[7:0] gamma_in; 94 | wire [47:0] out; 95 | 96 | initial begin 97 | i_clk<=1; 98 | //#5 99 | i_rst<=1; 100 | //al<=8'b0000_0010; 101 | //ga<=8'b0000_0010; 102 | //al<=8'b00000010; 103 | //ga<=8'b00111111010011001100110011001101; 104 | 105 | #5 i_rst<=0; 106 | forever begin 107 | #10 a<=$urandom%4; 108 | end 109 | //alpha_in=8'b0000_0001; 110 | //gamma_in=8'b0000_0001; 111 | end 112 | 113 | always begin 114 | #5 i_clk=~i_clk; 115 | 116 | end 117 | 118 | pipeline test( 119 | .clk(i_clk), 120 | .rst(i_rst), 121 | .action(a), 122 | //.alpha(al), 123 | //.gamma(ga), 124 | //.cina(ina), 125 | //.cinb(inb), 126 | //.alpha(alpha_in), 127 | //.gamma(gamma_in), 128 | .sum(out)); 129 | 130 | endmodule 131 | --------------------------------------------------------------------------------